Load in the necessary libraries

library(readxl)
library(ggplot2)
library(DT)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(stringr)
library(rjson)
library(ComplexHeatmap)
## Loading required package: grid
## ========================================
## ComplexHeatmap version 2.12.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
## 
## If you use it in published research, please cite:
## Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional 
##   genomic data. Bioinformatics 2016.
## 
## The new InteractiveComplexHeatmap package can directly export static 
## complex heatmaps into an interactive Shiny app with zero effort. Have a try!
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
library(GO.db)
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: IRanges
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:dplyr':
## 
##     first, rename
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## 
## Attaching package: 'IRanges'
## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice
## The following object is masked from 'package:grDevices':
## 
##     windows
## 
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:dplyr':
## 
##     select
## 
library(KEGGREST)
# library(KEGG.db)
library(fgsea)
library(org.Hs.eg.db)
## 
library(ggpubr)
library(DESeq2)
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## 
## Attaching package: 'matrixStats'
## The following objects are masked from 'package:Biobase':
## 
##     anyMissing, rowMedians
## The following object is masked from 'package:dplyr':
## 
##     count
## 
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## The following object is masked from 'package:Biobase':
## 
##     rowMedians
library(ggrepel)
library(ggprism)
## Warning: package 'ggprism' was built under R version 4.2.1
library(rstatix)
## 
## Attaching package: 'rstatix'
## The following object is masked from 'package:AnnotationDbi':
## 
##     select
## The following object is masked from 'package:IRanges':
## 
##     desc
## The following object is masked from 'package:stats':
## 
##     filter
library(ggsignif)
mod_path <- function(path){ # requires path to be pulled from external mounted through wsl
  str_replace_all(path,"/mnt/f","F:")
}

Exploring the sample metadata via the study manifest

finding samples that did not get aligned on JHPCE for some reason

manifest <- read_excel(mod_path("/mnt/f/Valsamo/manifest.xlsx"))
files_to_remove <- c("hg19MTERCC-ensembl75-genes-Q21777-Plate-1-E06_L65",
"hg19MTERCC-ensembl75-genes-Q21777-Plate-1-F12_L1.D707_508",
"hg19MTERCC-ensembl75-genes-Q23152+B4+H2+AG710464_L1.D705")
manifest <- manifest %>% dplyr::filter(!(Sample %in% files_to_remove))
manifest$Sample <- str_replace_all(manifest$Sample,"hg19MTERCC-ensembl75-genes-","")
fastq_files <- read.table(mod_path("/mnt/f/Valsamo/fastq_files.txt"))
SJ_files <- read.table(mod_path("/mnt/f/Valsamo/SJ_files.txt"))
SJ_files$sample_name <- vapply(SJ_files$V1,function(file){
  str_remove(file,"SJ.out.tab")
},character(1))
fastq_files$sample_name <- vapply(fastq_files$V1,function(file){
  str_remove(file,"_1.clipped.fastq.gz")
},character(1))
missing_files <- data.frame(sprintf(mod_path("/dcs04/fertig/data/theron/share/%s",fastq_files[which(!(fastq_files$sample_name %in% SJ_files$sample_name)),"V1"])))
write.table(missing_files,file=mod_path("/mnt/f/Valsamo/missing_fasta.txt"),
            quote=F, col.names = F, row.names = F, sep = "\t")

Looking at response rates across treatment group

RESIST 1.1 Progression Terms form: https://project.eortc.org/recist/wp-content/uploads/sites/4/2015/03/RECISTGuidelines.pdf

CR = Complete Response
PD = Progressive Disease
PR = Partial Response
SD = Stable Disease
NE = ?

ggplot(manifest,aes(x=TRTGRP,y=log2(PFS),fill=TRTGRP))+geom_violin()+geom_point(position = position_jitter(seed = 1, width = 0.2))

ggplot(manifest,aes(x=TRTGRP,y=log2(OS),fill=TRTGRP))+geom_violin()+geom_point(position = position_jitter(seed = 1, width = 0.2))

ggplot(manifest,aes(x=TRTGRP,fill=AX_BOR))+geom_bar(position='dodge')

ggplot(manifest,aes(x=TRTGRP,fill=AX_BOR3))+geom_bar(position='dodge')

ggplot(manifest,aes(x=TRTGRP,y=log2(PFS),fill=AX_TIMETEMP))+geom_violin()+geom_point(position = position_jitter(seed = 1, width = 0.2))

ggplot(manifest,aes(x=TRTGRP,y=log2(OS),fill=AX_TIMETEMP))+geom_violin()+geom_point(position = position_jitter(seed = 1, width = 0.2))

ggplot(manifest,aes(x=AX_BOR,fill=AX_TIMETEMP))+geom_bar(position='dodge')

ggplot(manifest,aes(x=TRTGRP,fill=AX_TIMETEMP))+geom_bar(position='dodge')

datatable(data.frame(table(manifest$AX_TIMETEMP)))

Leafcutter Analysis Preparation before treatment

Comparisons: a) NIV3.NAIVE(PR) vs PD b) NIV3.NAIVE(CR) vs PD c) NIV3.NAIVE(SD) vs PD d) NIV3.NAIVE(NE) vs PD d) NIV3.PROG(PR) vs PD e) NIV3.PROG(CR) vs PD f) NIV3.PROG(SD) vs PD g) NIV3.PROG(SD) vs PD h) NIV1.IPI3(PR) vs PD
i) NIV1.IPI3(SD) vs PD
j) NIV1.IPI3(NE) vs PD k) PD vs RESP

Forming groups_file.txt and junc_file.txt for each comparison before treatment

NIV3_NAIVE <- manifest[which(manifest$TRTGRP == "NIV3-NAIVE" & manifest$AX_TIMETEMP=="PRE"),]
NIV3_PROG <- manifest[which(manifest$TRTGRP == "NIV3-PROG" & manifest$AX_TIMETEMP=="PRE"),]
NIV1_IPI3 <- manifest[which(manifest$TRTGRP %in% c("NIV1+IPI3 P2","NIV1+IPI3 P3") & manifest$AX_TIMETEMP=="PRE"),]
PD <- c(NIV3_NAIVE$Sample[NIV3_NAIVE$AX_BOR=="PD"],NIV3_PROG$Sample[NIV3_PROG$AX_BOR=="PD"],NIV1_IPI3$Sample[NIV1_IPI3$AX_BOR=="PD"])
RESP <- c(NIV3_NAIVE$Sample[NIV3_NAIVE$AX_BOR!="PD" & NIV3_NAIVE$AX_BOR!="NE"],
          NIV3_PROG$Sample[NIV3_PROG$AX_BOR!="PD" & NIV3_PROG$AX_BOR!="NE"],
          NIV1_IPI3$Sample[NIV1_IPI3$AX_BOR!="PD" & NIV1_IPI3$AX_BOR!="NE"])
groups_and_junc_dir <- mod_path("/mnt/f/Valsamo/leafcutter_prep/run_12072021")
JHPCE_dir <- "/dcs04/fertig/data/theron/share/juncs"
comparisons <- list()
state <- "PRE"

NIV3.NAIVE(PR) vs PD

tar<-"PR"
comp<-"PD"
tar_samp<-"NIV3_NAIVE"
comp_samp<-"PD"
comparison <- sprintf("%s_%s_%s_%s",tar_samp,comp_samp,tar,comp)
targets<-NIV3_NAIVE$Sample[NIV3_NAIVE$AX_BOR == tar]
comparators<-PD
a<-list()
a[["targets"]] <- targets
a[["comparators"]] <- comparators


for (target in targets){
  target_junc_file <- sprintf("%s/%s_juncs_%s_%s.txt",groups_and_junc_dir,target,comparison,state)
  file_contents <- sprintf("%s.filt.junc",c(target,comparators))
  file_contents <- data.frame(file_contents)
  write.table(file_contents,target_junc_file,
            sep="\t",
            quote=F,
            col.names=F,
            row.names=F)
}
comparisons[[sprintf("%s_%s",comparison,state)]] <- a

NIV3.NAIVE(CR) vs PD

tar<-"CR"
comp<-"PD"
tar_samp<-"NIV3_NAIVE"
comp_samp<-"PD"
comparison <- sprintf("%s_%s_%s_%s",tar_samp,comp_samp,tar,comp)
targets<-NIV3_NAIVE$Sample[NIV3_NAIVE$AX_BOR == tar]
comparators<-PD
a<-list()
a[["targets"]] <- targets
a[["comparators"]] <- comparators

for (target in targets){
  target_junc_file <- sprintf("%s/%s_juncs_%s_%s.txt",groups_and_junc_dir,target,comparison,state)
  file_contents <- sprintf("%s.filt.junc",c(target,comparators))
  file_contents <- data.frame(file_contents)
  write.table(file_contents,target_junc_file,
            sep="\t",
            quote=F,
            col.names=F,
            row.names=F)
}
comparisons[[sprintf("%s_%s",comparison,state)]] <- a

NIV3.NAIVE(SD) vs PD

tar<-"SD"
comp<-"PD"
tar_samp<-"NIV3_NAIVE"
comp_samp<-"PD"
comparison <- sprintf("%s_%s_%s_%s",tar_samp,comp_samp,tar,comp)
targets<-NIV3_NAIVE$Sample[NIV3_NAIVE$AX_BOR == tar]
comparators<-PD
a<-list()
a[["targets"]] <- targets
a[["comparators"]] <- comparators

for (target in targets){
  target_junc_file <- sprintf("%s/%s_juncs_%s_%s.txt",groups_and_junc_dir,target,comparison,state)
  file_contents <- sprintf("%s.filt.junc",c(target,comparators))
  file_contents <- data.frame(file_contents)
  write.table(file_contents,target_junc_file,
            sep="\t",
            quote=F,
            col.names=F,
            row.names=F)
}
comparisons[[sprintf("%s_%s",comparison,state)]] <- a

NIV3.NAIVE(NE) vs PD

tar<-"NE"
comp<-"PD"
tar_samp<-"NIV3_NAIVE"
comp_samp<-"PD"
comparison <- sprintf("%s_%s_%s_%s",tar_samp,comp_samp,tar,comp)
targets<-NIV3_NAIVE$Sample[NIV3_NAIVE$AX_BOR == tar]
comparators<-PD
a<-list()
a[["targets"]] <- targets
a[["comparators"]] <- comparators


for (target in targets){
  target_junc_file <- sprintf("%s/%s_juncs_%s_%s.txt",groups_and_junc_dir,target,comparison,state)
  file_contents <- sprintf("%s.filt.junc",c(target,comparators))
  file_contents <- data.frame(file_contents)
  write.table(file_contents,target_junc_file,
            sep="\t",
            quote=F,
            col.names=F,
            row.names=F)
}
comparisons[[sprintf("%s_%s",comparison,state)]] <- a

NIV3.PROG(PR) vs PD

tar<-"PR"
comp<-"PD"
tar_samp <- "NIV3_PROG"
comp_samp<-"PD"
comparison <- sprintf("%s_%s_%s_%s",tar_samp,comp_samp,tar,comp)
targets<-NIV3_PROG$Sample[NIV3_PROG$AX_BOR == tar]
comparators<-PD
a<-list()
a[["targets"]] <- targets
a[["comparators"]] <- comparators

for (target in targets){
  target_junc_file <- sprintf("%s/%s_juncs_%s_%s.txt",groups_and_junc_dir,target,comparison,state)
  file_contents <- sprintf("%s.filt.junc",c(target,comparators))
  file_contents <- data.frame(file_contents)
  write.table(file_contents,target_junc_file,
            sep="\t",
            quote=F,
            col.names=F,
            row.names=F)
}
comparisons[[sprintf("%s_%s",comparison,state)]] <- a

NIV3.PROG(CR) vs PD

tar<-"CR"
comp<-"PD"
tar_samp <- "NIV3_PROG"
comp_samp<-"PD"
comparison <- sprintf("%s_%s_%s_%s",tar_samp,comp_samp,tar,comp)
targets<-NIV3_PROG$Sample[NIV3_PROG$AX_BOR == tar]
comparators<-PD
a<-list()
a[["targets"]] <- targets
a[["comparators"]] <- comparators

for (target in targets){
  target_junc_file <- sprintf("%s/%s_juncs_%s_%s.txt",groups_and_junc_dir,target,comparison,state)
  file_contents <- sprintf("%s.filt.junc",c(target,comparators))
  file_contents <- data.frame(file_contents)
  write.table(file_contents,target_junc_file,
            sep="\t",
            quote=F,
            col.names=F,
            row.names=F)
}
comparisons[[sprintf("%s_%s",comparison,state)]] <- a

NIV3.PROG(SD) vs PD

tar<-"SD"
comp<-"PD"
tar_samp <- "NIV3_PROG"
comp_samp<-"PD"
comparison <- sprintf("%s_%s_%s_%s",tar_samp,comp_samp,tar,comp)
targets<-NIV3_PROG$Sample[NIV3_PROG$AX_BOR == tar]
comparators<-PD
a<-list()
a[["targets"]] <- targets
a[["comparators"]] <- comparators

for (target in targets){
  target_junc_file <- sprintf("%s/%s_juncs_%s_%s.txt",groups_and_junc_dir,target,comparison,state)
  file_contents <- sprintf("%s.filt.junc",c(target,comparators))
  file_contents <- data.frame(file_contents)
  write.table(file_contents,target_junc_file,
            sep="\t",
            quote=F,
            col.names=F,
            row.names=F)
}
comparisons[[sprintf("%s_%s",comparison,state)]] <- a

NIV3.PROG(NE) vs PD

tar<-"NE"
comp<-"PD"
tar_samp <- "NIV3_PROG"
comp_samp<-"PD"
comparison <- sprintf("%s_%s_%s_%s",tar_samp,comp_samp,tar,comp)
targets<-NIV3_PROG$Sample[NIV3_PROG$AX_BOR == tar]
comparators<-PD
a<-list()
a[["targets"]] <- targets
a[["comparators"]] <- comparators

for (target in targets){
  target_junc_file <- sprintf("%s/%s_juncs_%s_%s.txt",groups_and_junc_dir,target,comparison,state)
  file_contents <- sprintf("%s.filt.junc",c(target,comparators))
  file_contents <- data.frame(file_contents)
  write.table(file_contents,target_junc_file,
            sep="\t",
            quote=F,
            col.names=F,
            row.names=F)
}
comparisons[[sprintf("%s_%s",comparison,state)]] <- a

NIV1.IPI3(PR) vs PD

tar<-"PR"
comp<-"PD"
tar_samp <- "NIV1_IPI3"
comp_samp<-"PD"
comparison <- sprintf("%s_%s_%s_%s",tar_samp,comp_samp,tar,comp)
targets<-NIV1_IPI3$Sample[NIV1_IPI3$AX_BOR == tar]
comparators<-PD
a<-list()
a[["targets"]] <- targets
a[["comparators"]] <- comparators

for (target in targets){
  target_junc_file <- sprintf("%s/%s_juncs_%s_%s.txt",groups_and_junc_dir,target,comparison,state)
  file_contents <- sprintf("%s.filt.junc",c(target,comparators))
  file_contents <- data.frame(file_contents)
  write.table(file_contents,target_junc_file,
            sep="\t",
            quote=F,
            col.names=F,
            row.names=F)
}
comparisons[[sprintf("%s_%s",comparison,state)]] <- a

NIV1.IPI3(SD) vs PD

tar<-"SD"
comp<-"PD"
tar_samp <- "NIV1_IPI3"
comp_samp<-"PD"
comparison <- sprintf("%s_%s_%s_%s",tar_samp,comp_samp,tar,comp)
targets<-NIV1_IPI3$Sample[NIV1_IPI3$AX_BOR == tar]
comparators<-PD
a<-list()
a[["targets"]] <- targets
a[["comparators"]] <- comparators

for (target in targets){
  target_junc_file <- sprintf("%s/%s_juncs_%s_%s.txt",groups_and_junc_dir,target,comparison,state)
  file_contents <- sprintf("%s.filt.junc",c(target,comparators))
  file_contents <- data.frame(file_contents)
  write.table(file_contents,target_junc_file,
            sep="\t",
            quote=F,
            col.names=F,
            row.names=F)
}
comparisons[[sprintf("%s_%s",comparison,state)]] <- a

NIV1.IPI3(NE) vs PD

tar<-"NE"
comp<-"PD"
tar_samp <- "NIV1_IPI3"
comp_samp<-"PD"
comparison <- sprintf("%s_%s_%s_%s",tar_samp,comp_samp,tar,comp)
targets<-NIV1_IPI3$Sample[NIV1_IPI3$AX_BOR == tar]
comparators<-PD
a<-list()
a[["targets"]] <- targets
a[["comparators"]] <- comparators

for (target in targets){
  target_junc_file <- sprintf("%s/%s_juncs_%s_%s.txt",groups_and_junc_dir,target,comparison,state)
  file_contents <- sprintf("%s.filt.junc",c(target,comparators))
  file_contents <- data.frame(file_contents)
  write.table(file_contents,target_junc_file,
            sep="\t",
            quote=F,
            col.names=F,
            row.names=F)
}
comparisons[[sprintf("%s_%s",comparison,state)]] <- a

PD vs RESP

tar<-"PD"
comp<-"RESP"
tar_samp <- "PD"
comp_samp<-"RESP"
comparison <- sprintf("%s_%s_%s_%s",tar_samp,comp_samp,tar,comp)
targets<-PD
comparators<-RESP
a<-list()
a[["targets"]] <- targets
a[["comparators"]] <- comparators

for (target in targets){
  target_junc_file <- sprintf("%s/%s_juncs_%s_%s.txt",groups_and_junc_dir,target,comparison,state)
  file_contents <- sprintf("%s.filt.junc",c(target,comparators))
  file_contents <- data.frame(file_contents)
  write.table(file_contents,target_junc_file,
            sep="\t",
            quote=F,
            col.names=F,
            row.names=F)
}
comparisons[[sprintf("%s_%s",comparison,state)]] <- a

Leafcutter Analysis Preparation after treatment

Comparisons: a) NIV3.NAIVE(PR) vs PD b) NIV3.NAIVE(CR) vs PD c) NIV3.NAIVE(SD) vs PD d) NIV3.NAIVE(NE) vs PD d) NIV3.PROG(PR) vs PD e) NIV3.PROG(CR) vs PD f) NIV3.PROG(SD) vs PD g) NIV3.PROG(SD) vs PD h) NIV1.IPI3(PR) vs PD
i) NIV1.IPI3(SD) vs PD
j) NIV1.IPI3(NE) vs PD k) PD vs RESP

Forming groups_file.txt and junc_file.txt for each comparison after treatment

NIV3_NAIVE <- manifest[which(manifest$TRTGRP == "NIV3-NAIVE" & manifest$AX_TIMETEMP=="POST"),]
NIV3_PROG <- manifest[which(manifest$TRTGRP == "NIV3-PROG" & manifest$AX_TIMETEMP=="POST"),]
NIV1_IPI3 <- manifest[which(manifest$TRTGRP %in% c("NIV1+IPI3 P2","NIV1+IPI3 P3") & manifest$AX_TIMETEMP=="POST"),]
PD <- c(NIV3_NAIVE$Sample[NIV3_NAIVE$AX_BOR=="PD"],NIV3_PROG$Sample[NIV3_PROG$AX_BOR=="PD"],NIV1_IPI3$Sample[NIV1_IPI3$AX_BOR=="PD"])
RESP <- c(NIV3_NAIVE$Sample[NIV3_NAIVE$AX_BOR!="PD" & NIV3_NAIVE$AX_BOR!="NE"],
          NIV3_PROG$Sample[NIV3_PROG$AX_BOR!="PD" & NIV3_PROG$AX_BOR!="NE"],
          NIV1_IPI3$Sample[NIV1_IPI3$AX_BOR!="PD" & NIV1_IPI3$AX_BOR!="NE"])
groups_and_junc_dir <- mod_path("/mnt/f/Valsamo/leafcutter_prep/run_12072021")
JHPCE_dir <- "/dcs04/fertig/data/theron/share/juncs"
state <- "POST"

NIV3.NAIVE(PR) vs PD

tar<-"PR"
comp<-"PD"
tar_samp<-"NIV3_NAIVE"
comp_samp<-"PD"
comparison <- sprintf("%s_%s_%s_%s",tar_samp,comp_samp,tar,comp)
targets<-NIV3_NAIVE$Sample[NIV3_NAIVE$AX_BOR == tar]
comparators<-PD
a<-list()
a[["targets"]] <- targets
a[["comparators"]] <- comparators

for (target in targets){
  target_junc_file <- sprintf("%s/%s_juncs_%s_%s.txt",groups_and_junc_dir,target,comparison,state)
  file_contents <- sprintf("%s.filt.junc",c(target,comparators))
  file_contents <- data.frame(file_contents)
  write.table(file_contents,target_junc_file,
            sep="\t",
            quote=F,
            col.names=F,
            row.names=F)
}
comparisons[[sprintf("%s_%s",comparison,state)]] <- a

NIV3.NAIVE(CR) vs PD

tar<-"CR"
comp<-"PD"
tar_samp<-"NIV3_NAIVE"
comp_samp<-"PD"
comparison <- sprintf("%s_%s_%s_%s",tar_samp,comp_samp,tar,comp)
targets<-NIV3_NAIVE$Sample[NIV3_NAIVE$AX_BOR == tar]
comparators<-PD
a<-list()
a[["targets"]] <- targets
a[["comparators"]] <- comparators

for (target in targets){
  target_junc_file <- sprintf("%s/%s_juncs_%s_%s.txt",groups_and_junc_dir,target,comparison,state)
  file_contents <- sprintf("%s.filt.junc",c(target,comparators))
  file_contents <- data.frame(file_contents)
  write.table(file_contents,target_junc_file,
            sep="\t",
            quote=F,
            col.names=F,
            row.names=F)
}
comparisons[[sprintf("%s_%s",comparison,state)]] <- a

NIV3.NAIVE(SD) vs PD

tar<-"SD"
comp<-"PD"
tar_samp<-"NIV3_NAIVE"
comp_samp<-"PD"
comparison <- sprintf("%s_%s_%s_%s",tar_samp,comp_samp,tar,comp)
targets<-NIV3_NAIVE$Sample[NIV3_NAIVE$AX_BOR == tar]
comparators<-PD
a<-list()
a[["targets"]] <- targets
a[["comparators"]] <- comparators

for (target in targets){
  target_junc_file <- sprintf("%s/%s_juncs_%s_%s.txt",groups_and_junc_dir,target,comparison,state)
  file_contents <- sprintf("%s.filt.junc",c(target,comparators))
  file_contents <- data.frame(file_contents)
  write.table(file_contents,target_junc_file,
            sep="\t",
            quote=F,
            col.names=F,
            row.names=F)
}
comparisons[[sprintf("%s_%s",comparison,state)]] <- a

NIV3.NAIVE(NE) vs PD

tar<-"NE"
comp<-"PD"
tar_samp<-"NIV3_NAIVE"
comp_samp<-"PD"
comparison <- sprintf("%s_%s_%s_%s",tar_samp,comp_samp,tar,comp)
targets<-NIV3_NAIVE$Sample[NIV3_NAIVE$AX_BOR == tar]
comparators<-PD
a<-list()
a[["targets"]] <- targets
a[["comparators"]] <- comparators

for (target in targets){
  target_junc_file <- sprintf("%s/%s_juncs_%s_%s.txt",groups_and_junc_dir,target,comparison,state)
  file_contents <- sprintf("%s.filt.junc",c(target,comparators))
  file_contents <- data.frame(file_contents)
  write.table(file_contents,target_junc_file,
            sep="\t",
            quote=F,
            col.names=F,
            row.names=F)
}
comparisons[[sprintf("%s_%s",comparison,state)]] <- a

NIV3.PROG(PR) vs PD

tar<-"PR"
comp<-"PD"
tar_samp <- "NIV3_PROG"
comp_samp<-"PD"
comparison <- sprintf("%s_%s_%s_%s",tar_samp,comp_samp,tar,comp)
targets<-NIV3_PROG$Sample[NIV3_PROG$AX_BOR == tar]
comparators<-PD
a<-list()
a[["targets"]] <- targets
a[["comparators"]] <- comparators

for (target in targets){
  target_junc_file <- sprintf("%s/%s_juncs_%s_%s.txt",groups_and_junc_dir,target,comparison,state)
  file_contents <- sprintf("%s.filt.junc",c(target,comparators))
  file_contents <- data.frame(file_contents)
  write.table(file_contents,target_junc_file,
            sep="\t",
            quote=F,
            col.names=F,
            row.names=F)
}
comparisons[[sprintf("%s_%s",comparison,state)]] <- a

NIV3.PROG(CR) vs PD

tar<-"CR"
comp<-"PD"
tar_samp <- "NIV3_PROG"
comp_samp<-"PD"
comparison <- sprintf("%s_%s_%s_%s",tar_samp,comp_samp,tar,comp)
targets<-NIV3_PROG$Sample[NIV3_PROG$AX_BOR == tar]
comparators<-PD
a<-list()
a[["targets"]] <- targets
a[["comparators"]] <- comparators

for (target in targets){
  target_junc_file <- sprintf("%s/%s_juncs_%s_%s.txt",groups_and_junc_dir,target,comparison,state)
  file_contents <- sprintf("%s.filt.junc",c(target,comparators))
  file_contents <- data.frame(file_contents)
  write.table(file_contents,target_junc_file,
            sep="\t",
            quote=F,
            col.names=F,
            row.names=F)
}
comparisons[[sprintf("%s_%s",comparison,state)]] <- a

NIV3.PROG(SD) vs PD

tar<-"SD"
comp<-"PD"
tar_samp <- "NIV3_PROG"
comp_samp<-"PD"
comparison <- sprintf("%s_%s_%s_%s",tar_samp,comp_samp,tar,comp)
targets<-NIV3_PROG$Sample[NIV3_PROG$AX_BOR == tar]
comparators<-PD
a<-list()
a[["targets"]] <- targets
a[["comparators"]] <- comparators

for (target in targets){
  target_junc_file <- sprintf("%s/%s_juncs_%s_%s.txt",groups_and_junc_dir,target,comparison,state)
  file_contents <- sprintf("%s.filt.junc",c(target,comparators))
  file_contents <- data.frame(file_contents)
  write.table(file_contents,target_junc_file,
            sep="\t",
            quote=F,
            col.names=F,
            row.names=F)
}
comparisons[[sprintf("%s_%s",comparison,state)]] <- a

NIV3.PROG(NE) vs PD

tar<-"NE"
comp<-"PD"
tar_samp <- "NIV3_PROG"
comp_samp<-"PD"
comparison <- sprintf("%s_%s_%s_%s",tar_samp,comp_samp,tar,comp)
targets<-NIV3_PROG$Sample[NIV3_PROG$AX_BOR == tar]
comparators<-PD
a<-list()
a[["targets"]] <- targets
a[["comparators"]] <- comparators

for (target in targets){
  target_junc_file <- sprintf("%s/%s_juncs_%s_%s.txt",groups_and_junc_dir,target,comparison,state)
  file_contents <- sprintf("%s.filt.junc",c(target,comparators))
  file_contents <- data.frame(file_contents)
  write.table(file_contents,target_junc_file,
            sep="\t",
            quote=F,
            col.names=F,
            row.names=F)
}
comparisons[[sprintf("%s_%s",comparison,state)]] <- a

NIV1.IPI3(PR) vs PD

tar<-"PR"
comp<-"PD"
tar_samp <- "NIV1_IPI3"
comp_samp<-"PD"
comparison <- sprintf("%s_%s_%s_%s",tar_samp,comp_samp,tar,comp)
targets<-NIV1_IPI3$Sample[NIV1_IPI3$AX_BOR == tar]
comparators<-PD
a<-list()
a[["targets"]] <- targets
a[["comparators"]] <- comparators

for (target in targets){
  target_junc_file <- sprintf("%s/%s_juncs_%s_%s.txt",groups_and_junc_dir,target,comparison,state)
  file_contents <- sprintf("%s.filt.junc",c(target,comparators))
  file_contents <- data.frame(file_contents)
  write.table(file_contents,target_junc_file,
            sep="\t",
            quote=F,
            col.names=F,
            row.names=F)
}
comparisons[[sprintf("%s_%s",comparison,state)]] <- a

NIV1.IPI3(SD) vs PD

tar<-"SD"
comp<-"PD"
tar_samp <- "NIV1_IPI3"
comp_samp<-"PD"
comparison <- sprintf("%s_%s_%s_%s",tar_samp,comp_samp,tar,comp)
targets<-NIV1_IPI3$Sample[NIV1_IPI3$AX_BOR == tar]
comparators<-PD
a<-list()
a[["targets"]] <- targets
a[["comparators"]] <- comparators

for (target in targets){
  target_junc_file <- sprintf("%s/%s_juncs_%s_%s.txt",groups_and_junc_dir,target,comparison,state)
  file_contents <- sprintf("%s.filt.junc",c(target,comparators))
  file_contents <- data.frame(file_contents)
  write.table(file_contents,target_junc_file,
            sep="\t",
            quote=F,
            col.names=F,
            row.names=F)
}
comparisons[[sprintf("%s_%s",comparison,state)]] <- a

NIV1.IPI3(NE) vs PD

tar<-"NE"
comp<-"PD"
tar_samp <- "NIV1_IPI3"
comp_samp<-"PD"
comparison <- sprintf("%s_%s_%s_%s",tar_samp,comp_samp,tar,comp)
targets<-NIV1_IPI3$Sample[NIV1_IPI3$AX_BOR == tar]
comparators<-PD
a<-list()
a[["targets"]] <- targets
a[["comparators"]] <- comparators

for (target in targets){
  target_junc_file <- sprintf("%s/%s_juncs_%s_%s.txt",groups_and_junc_dir,target,comparison,state)
  file_contents <- sprintf("%s.filt.junc",c(target,comparators))
  file_contents <- data.frame(file_contents)
  write.table(file_contents,target_junc_file,
            sep="\t",
            quote=F,
            col.names=F,
            row.names=F)
}
comparisons[[sprintf("%s_%s",comparison,state)]] <- a

PD vs RESP

tar<-"PD"
comp<-"RESP"
tar_samp <- "PD"
comp_samp<-"RESP"
comparison <- sprintf("%s_%s_%s_%s",tar_samp,comp_samp,tar,comp)
targets<-PD
comparators<-RESP
a<-list()
a[["targets"]] <- targets
a[["comparators"]] <- comparators

for (target in targets){
  target_junc_file <- sprintf("%s/%s_juncs_%s_%s.txt",groups_and_junc_dir,target,comparison,state)
  file_contents <- sprintf("%s.filt.junc",c(target,comparators))
  file_contents <- data.frame(file_contents)
  write.table(file_contents,target_junc_file,
            sep="\t",
            quote=F,
            col.names=F,
            row.names=F)
}
comparisons[[sprintf("%s_%s",comparison,state)]] <- a

saveRDS(comparisons,file=mod_path("/mnt/f/Valsamo/analysis/leafcutterMD/run_12072021/comparisons.rds"))
comp_frame <- data.frame(names(comparisons))
write.table(comp_frame,
            file=mod_path("/mnt/f/Valsamo/analysis/leafcutterMD/run_12072021/comparisons.txt"),
            quote=F,
            sep="\t",
            col.names=F,
            row.names=F)

Processing comparison juncs

comparison_junctions <- list()
leafcutter_files <- read.table(mod_path("/mnt/f/Valsamo/analysis/leafcutterMD/run_12072021/filenames.txt"))
for (i in seq(nrow(leafcutter_files))){
  print(i)
  outlier_file <- leafcutter_files[i,]
  outlier_juncs <- read.table(outlier_file)
  outlier_junc_cols <- str_replace_all(colnames(outlier_juncs),".filt","")
  colnames(outlier_juncs) <- str_replace_all(outlier_junc_cols,"[-_+.]","_")
  file_split <- strsplit(outlier_file,"_juncs_")[[1]]
  sample <- basename(file_split[1])
  sample <- substr(sample,1,nchar(sample))
  sample<-str_replace_all(sample,"[-_+.]","_")
  arm_info <- strsplit(file_split[2],"_")[[1]]
  tar_samp <- sprintf("%s_%s",arm_info[1],arm_info[2])
  comp_samp <- arm_info[3]
  if (length(arm_info) == 6){
    tar_samp <- arm_info[1]
    comp_samp <- arm_info[2]
    tar<-arm_info[3]
    comp<-arm_info[4]
    time<-arm_info[5]
  } else {
    tar_samp <- sprintf("%s_%s",arm_info[1],arm_info[2])
    comp_samp <- arm_info[3]
    tar <- arm_info[4]
    comp <- arm_info[5]
    time <- arm_info[6]
  }
  comparison <- sprintf("%s_%s_%s_%s_%s",tar_samp,comp_samp,tar,comp,time)
  sig_juncs <- rownames(outlier_juncs)[as.numeric(outlier_juncs[,sample]) <=0.05]
  comparison_junctions[[comparison]] <- unique(c(comparison_junctions[[comparison]],sig_juncs))
}
comparison_juncs_linear <- data.frame(t(vapply(unname(unlist(comparison_junctions)),function(junc){
  junc_vals<-strsplit(junc,":")[[1]]
  strand<-strsplit(junc_vals[4],"_")[[1]][3]
  c(junc_vals[1],junc_vals[2],junc_vals[3],strand)
},character(4))))
rownames(comparison_juncs_linear)<-seq(nrow(comparison_juncs_linear))
colnames(comparison_juncs_linear)<-c("chr","start","end","strand")
comparison_juncs_linear<-unique(comparison_juncs_linear)


iter<-1
total_juncs <- nrow(comparison_juncs_linear)
for (i in seq(1,total_juncs,5000)){
  if (i+4999 >= total_juncs){
    end = total_juncs
  } else {
    end = i+4999
  }
  comparison_juncs_small <- comparison_juncs_linear[seq(i,end),]
  saveRDS(comparison_juncs_small,
          sprintf(mod_path("/mnt/f/Valsamo/analysis/leafcutterMD/run_12072021/comparison_juncs_linear_%d.rds",iter)))
  iter<-iter+1
}


saveRDS(comparison_juncs_linear,file=mod_path("/mnt/f/Valsamo/analysis/leafcutterMD/run_12072021/comparison_juncs_linear.rds"))
saveRDS(comparison_junctions,file=mod_path("/mnt/f/Valsamo/analysis/leafcutterMD/run_12072021/comparison_juncs.rds"))

Calculating junction usage and coding potential characteristics

Processing the gene metric data

Mean processing

comparisons_file <- mod_path("/mnt/f/Valsamo/analysis/JHPCE/create_comparisons/comparisons.rds")
comparison_dat <- readRDS(comparisons_file)

gene_metric_files <- mod_path("/mnt/f/Valsamo/analysis/JHPCE/calc_gene_metric_out_cp/gene_metric_files_mean.txt")
gene_metric_comp<-read.table(gene_metric_files,header=F)
total_genes<-c()
all_comps <- list()
samples <- c()
comparisons <- c()
comp_genes <- list()
for (comp in gene_metric_comp$V1){
  comp_dir <- dirname(comp)
  comp_vals <- readRDS(mod_path(comp))
  comp <- str_replace(basename(comp),"_gene_metric_mean_len_norm.rds","")
  coding_potential_file <- sprintf("%s/%s_coding_potential_LGC.rds",comp_dir,comp)
  coding_potential_LGC <- readRDS(mod_path(coding_potential_file))
  HIGH_CP_GENES <- rownames(coding_potential_LGC)[coding_potential_LGC$coding_potential>0]
  comp_split <- strsplit(comp,"_")[[1]]
  targets <- comparison_dat[[comp]]$targets
  targets <- targets[which(targets %in% colnames(comp_vals))]
  comp_vals <- comp_vals[HIGH_CP_GENES,targets,drop=F]
  targets <-   sprintf("%s_%s",targets,comp_split[length(comp_split)])
  # print(comp)
  # print(targets)
  genes <- rownames(comp_vals)
  comp_genes[[comp]] <- unique(genes)
  total_genes<-unique(c(total_genes,genes))
  all_comps[[comp]] <- comp_vals
  samples <- c(samples,targets)
  comparisons <- c(comparisons,rep(comp,ncol(comp_vals)))
}

Mean comparisons all pre-processing

comp_vals_all <- data.frame(total_genes)
rownames(comp_vals_all)<-total_genes
for (comp in names(all_comps)){
  comp_vals <- all_comps[[comp]]
  comp_split <- strsplit(comp,"_")[[1]]
  comp_vals_all[rownames(comp_vals),sprintf("%s_%s",colnames(comp_vals),comp_split[length(comp_split)])]<-comp_vals
}
comp_vals_all <- comp_vals_all[,seq(2,ncol(comp_vals_all))]

col_data <- data.frame(samples,comparisons)
col_data$time <- vapply(col_data$comparisons,function(comp){
  a<-strsplit(comp,"_")[[1]]
  return(a[length(a)])
},character(1))
col_data$comp <- vapply(col_data$comparisons,function(comp_val){
  a<-str_split(comp_val,"_")[[1]]
  return(paste(a[seq(length(a)-1)],collapse="_"))
},character(1))
col_data$sample <- vapply(col_data$samples,function(samp){
  a<-str_split(samp,"_")[[1]]
  return(paste(a[seq(length(a)-1)],collapse="_"))
},character(1))
col_data$subj_id <- vapply(col_data$sample,function(samp){
  s<<-samp
  manifest$AX_USUBJID[manifest$Sample == samp]
},character(1))
col_data$response <- vapply(col_data$comp,function(comp){
  a<-str_split(comp,"_")[[1]]
  b<-a[length(a)-1]
  if (b %in% c("CR","PR")){
    b <- "CR_PR"
  }
  return(b)
},character(1))
col_data$comparator <- vapply(col_data$comp,function(comp){
  a<-str_split(comp,"_")[[1]]
  b<-a[length(a)]
  if (b %in% c("CR","PR")){
    b <- "CR_PR"
  }
  return(b)
},character(1))
col_data$PFS <- vapply(col_data$subj_id,function(subject){
  return(manifest$PFS[manifest$AX_USUBJID == subject][1])
},numeric(1))
col_data$OS <- vapply(col_data$subj_id,function(subject){
  return(manifest$OS[manifest$AX_USUBJID == subject][1])
},numeric(1))
col_data$treatment <- vapply(col_data$subj_id,function(subject){
  return(str_split(manifest$TRTGRP[manifest$AX_USUBJID == subject][1]," ")[[1]][1])
},character(1))
col_data <- col_data %>% dplyr::filter(col_data$samples %in% colnames(comp_vals_all))

comp_vals_all[is.na(comp_vals_all)]<-0
comp_vals_all <- comp_vals_all[apply(comp_vals_all,1,sd)!=0,]
# all(col_data$samples == colnames(comp_vals_all)) == TRUE

col_data <- col_data[order(col_data$comparisons),]
comp_vals_all <- comp_vals_all[,order(col_data$comparisons)]

PRE_cols <- which(str_detect(colnames(comp_vals_all),"PRE"))
POST_cols <- which(str_detect(colnames(comp_vals_all),"POST"))

comp_vals_PRE <- comp_vals_all[,PRE_cols]
col_data_PRE <- col_data[PRE_cols,]
PRE_cols <- which(col_data_PRE$response != "NE")
comp_vals_PRE <- comp_vals_PRE[,PRE_cols]
col_data_PRE <- col_data_PRE[PRE_cols,]

comp_vals_POST <- comp_vals_all[,POST_cols]
col_data_POST <- col_data[POST_cols,]
POST_cols <- which(col_data_POST$response != "NE")
comp_vals_POST <- comp_vals_POST[,POST_cols]
col_data_POST <- col_data_POST[POST_cols,]

color_vals <- c("red","green","blue","red","green","blue")
names(color_vals)<-c("CR_PR","SD","PD","NIV1+IPI3","NIV3-NAIVE","NIV3-PROG")

Proprocessing junctions per comparison

calc_mean_CP <- function(splice_dat,comp_str){
  juncs <- unique(splice_dat$juncs)
  a<-vapply(juncs,function(junc){
    mean(splice_dat[splice_dat$juncs==junc,"coding_potential_LGC"],na.rm=T)
  },numeric(1))
  junc_LGC <- data.frame(LGC = a)
  junc_LGC$comp <- comp_str
  return(junc_LGC)
}
calc_junc_types <- function(splice_dat,comp_str){
  junc_types <- data.frame(table(splice_dat$error))
  junc_types$comp <- comp_str
  total <- sum(junc_types$Freq)
  junc_types$Freq <- junc_types$Freq/total
  return(junc_types)
}

comparisons_file <- mod_path("/mnt/f/Valsamo/analysis/JHPCE/create_comparisons/comparisons.rds")
comparison_dat <- readRDS(comparisons_file)
splice_dat_files <-  read.table(mod_path("/mnt/f/Valsamo/analysis/JHPCE/create_comparisons_out_cp/filenames.txt"))


comparisons <- c()
for (val in seq(length(splice_dat_files$V1))){
  print(val)
  comp<-splice_dat_files$V1[val]
  splice_dat <- readRDS(mod_path(comp))
  comp_dir <- dirname(comp)
  splice_dat_comp <- readRDS(mod_path(comp))
  comp_str <- str_replace(basename(comp),"splice_dat_","")
  comp_str <- str_replace(basename(comp_str),".rds","")
  comparisons <- c(comparisons,comp_str)
  if (val == 1){
    splice_dat_CP_LGC <- calc_mean_CP(splice_dat_comp,comp_str)
    all_junc_types <- calc_junc_types(splice_dat_comp,comp_str)
  } else {
    splice_dat_CP_LGC <- rbind(splice_dat_CP_LGC,calc_mean_CP(splice_dat_comp,comp_str))
    all_junc_types <- rbind(all_junc_types,calc_junc_types(splice_dat_comp,comp_str))
  }
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
## [1] 11
## [1] 12
## [1] 13
## [1] 14
## [1] 15
## [1] 16
## [1] 17
## [1] 18
## [1] 19
## [1] 20
## [1] 21
## [1] 22
times <- vapply(comparisons,function(comp){
  a<-strsplit(comp,"_")[[1]]
  return(a[length(a)])
},character(1))
comps <- vapply(comparisons,function(comp_val){
  a<-str_split(comp_val,"_")[[1]]
  return(paste(a[seq(length(a)-1)],collapse="_"))
},character(1))
responses <- vapply(comparisons,function(comp){
  a<-str_split(comp,"_")[[1]]
  b<-a[length(a)-2]
  if (b %in% c("CR","PR")){
    b <- "CR_PR"
  }
  return(b)
},character(1))
comparators <- vapply(comparisons,function(comp){
  a<-str_split(comp,"_")[[1]]
  b<-a[length(a)-1]
  if (b %in% c("CR","PR")){
    b <- "CR_PR"
  }
  return(b)
},character(1))
treatments <- vapply(comparisons,function(comp){
  a<-str_split(comp,"_")[[1]]
  b<-paste(a[seq(2)],collapse="_")
  return(b)
},character(1))
metadata <- data.frame(comp=comps,time=times,response=responses,comparator=comparators,treatment=treatments)
metadata$comparison <- sprintf("%s vs %s",metadata$response,metadata$comparator)
splice_dat_CP_LGC <- cbind(splice_dat_CP_LGC,metadata[splice_dat_CP_LGC$comp,seq(2,ncol(metadata))])
all_junc_types <- cbind(all_junc_types,metadata[all_junc_types$comp,seq(2,ncol(metadata))])

all_junc_types$time <- factor(all_junc_types$time,levels=c("PRE","POST"))
all_junc_types$error <- factor(vapply(all_junc_types$Var1,function(val){
  if (val == "start_not_beg"){
    return("late_start")
  } else if (val == "start_not_beg:stop_not_end"){
    return("late_start+early_stop")
  } else if (val == "stop_not_end"){
    return("early_stop")
  } else if (val=="tx"){
    return("tx")
  }
},character(1)),levels=c("late_start+early_stop","late_start","early_stop"))

all_junc_types_filt <- all_junc_types[all_junc_types$Var1!="tx",]
all_junc_types_filt$comparison <- sprintf("%s vs %s",all_junc_types_filt$response,all_junc_types_filt$comparator)
all_junc_types_filt_PD <- all_junc_types_filt[all_junc_types_filt$comparison == "PD vs RESP",]
all_junc_types_filt <- all_junc_types_filt[all_junc_types_filt$comparison != "NE vs PD" & all_junc_types_filt$comparison != "PD vs RESP",]

ggplot(all_junc_types_filt,aes(y=Freq,x=time,fill=error))+
  geom_bar(stat="identity")+facet_grid(comparison~treatment)+
  ylab("Junction Frequency")+
  xlab("Treatment Time")

ggplot(all_junc_types_filt_PD,aes(y=Freq,x=time,fill=error))+
  geom_bar(stat="identity")+facet_grid(comparison~.)+
  ylim(c(0,0.06))+
  ylab("Junction Frequency")+
  xlab("Treatment Time")

splice_dat_CP_LGC_filt <- splice_dat_CP_LGC[splice_dat_CP_LGC$comparison != "NE vs PD",]
splice_dat_CP_LGC_filt$resp <- splice_dat_CP_LGC_filt$response
splice_dat_CP_LGC_filt_PD <- splice_dat_CP_LGC_filt[splice_dat_CP_LGC_filt$treatment == "PD_RESP",]
splice_dat_CP_LGC_filt <- splice_dat_CP_LGC_filt[splice_dat_CP_LGC_filt$treatment!="PD_RESP",]
splice_dat_CP_LGC_filt$time <- factor(splice_dat_CP_LGC_filt$time,levels=c("PRE","POST"))
wilcox_test_val_all <- compare_means(LGC ~ time, p.adjust.method = "BH", data=splice_dat_CP_LGC_filt,group.by = c("response","treatment"))
ggplot(splice_dat_CP_LGC_filt,aes(x=time,y=LGC))+
  geom_violin()+
  facet_grid(treatment~response)+
  add_pvalue(wilcox_test_val_all, label = "p.signif", remove.bracket = TRUE,y.position=33)+
  ylab("Splicing Antigenicity")+
  xlab("Treatment Time")+
  ylim(c(-2,35))

ggplot(splice_dat_CP_LGC_filt,aes(x=time,y=LGC))+
  geom_boxplot()+
  facet_grid(treatment~response)+
  add_pvalue(wilcox_test_val_all, label = "p.signif", remove.bracket = TRUE,y.position=33)+
  ylab("Splicing Antigenicity")+
  xlab("Treatment Time")+
  ylim(c(-2,35))

wilcox_test_val_all <- compare_means(LGC ~ time, p.adjust.method = "BH", data=splice_dat_CP_LGC_filt_PD,group.by = c("response","treatment"))
ggplot(splice_dat_CP_LGC_filt_PD,aes(x=time,y=LGC))+
  geom_violin()+
  facet_grid(treatment~.)+
  add_pvalue(wilcox_test_val_all, label = "p.signif", remove.bracket = TRUE,y.position=120)+
  ylab("Splicing Antigenicity")+
  xlab("Treatment Time")

ggplot(splice_dat_CP_LGC_filt_PD,aes(x=time,y=LGC))+
  geom_boxplot()+
  facet_grid(treatment~.)+
  add_pvalue(wilcox_test_val_all, label = "p.signif", remove.bracket = TRUE,y.position=120)+
  ylab("Splicing Antigenicity")+
  xlab("Treatment Time")

splice_dat_CP_LGC_filt <- splice_dat_CP_LGC[splice_dat_CP_LGC$comparison != "NE vs PD",]
splice_dat_CP_LGC_filt$resp <- splice_dat_CP_LGC_filt$response
splice_dat_CP_LGC_filt_PD <- splice_dat_CP_LGC_filt[splice_dat_CP_LGC_filt$treatment == "PD_RESP",]
splice_dat_CP_LGC_filt <- splice_dat_CP_LGC_filt[splice_dat_CP_LGC_filt$treatment!="PD_RESP",]
splice_dat_CP_LGC_filt$time <- factor(splice_dat_CP_LGC_filt$time,levels=c("PRE","POST"))
wilcox_test_val_all <- compare_means(LGC ~ response, p.adjust.method = "BH", data=splice_dat_CP_LGC_filt,group.by = c("treatment","time"))
# wilcox_test_val_all$y.position <- c(35,33,31,35,33,31,35,33,31,35,33,31,35,33,31,35,33,3)
ggplot(splice_dat_CP_LGC_filt,aes(x=response,y=LGC))+
  geom_violin()+
  facet_grid(time~treatment)+
  add_pvalue(wilcox_test_val_all, label = "p.signif", remove.bracket = TRUE,y.position=30)+
  ylab("Coding Potential")+
  xlab("Response")

ggplot(splice_dat_CP_LGC_filt,aes(x=response,y=LGC))+
  geom_boxplot()+
  facet_grid(time~treatment)+
  add_pvalue(wilcox_test_val_all, label = "p.signif", remove.bracket = TRUE,y.position=30)+
  ylab("Coding Potential")+
  xlab("Response")

PRE Heatmap

comp_vals_PRE <- comp_vals_PRE[apply(comp_vals_PRE,1,sd)!=0,]
Heatmap(t(apply(as.matrix(comp_vals_PRE),1,scale)),
    top_annotation = HeatmapAnnotation(PFS=anno_barplot(col_data_PRE$PFS),
                                       response=col_data_PRE$response,
                                       treatment=col_data_PRE$treatment,
                                       col=list(response=c("CR_PR"="red","SD"="green","PD"="blue"),
                                                treatment=c("NIV1+IPI3"="red","NIV3-NAIVE"="green","NIV3-PROG"="blue"))),
    bottom_annotation = HeatmapAnnotation(OS=anno_barplot(col_data_PRE$OS)),
    show_row_names=F,
    show_column_names = F,
    cluster_rows=F,
    cluster_columns=F)
## `use_raster` is automatically set to TRUE for a matrix with more than
## 2000 rows. You can control `use_raster` argument by explicitly setting
## TRUE/FALSE to it.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.

Heatmap(comp_vals_PRE,
    top_annotation = HeatmapAnnotation(PFS=anno_barplot(col_data_PRE$PFS),
                                       response=col_data_PRE$response,
                                       treatment=col_data_PRE$treatment,
                                       col=list(response=c("CR_PR"="red","SD"="green","PD"="blue"),
                                                treatment=c("NIV1+IPI3"="red","NIV3-NAIVE"="green","NIV3-PROG"="blue"))),
    bottom_annotation = HeatmapAnnotation(OS=anno_barplot(col_data_PRE$OS)),
    show_row_names=F,
    show_column_names = F,
    cluster_rows=F,
    cluster_columns=F)
## Warning: The input is a data frame-like object, convert it to a matrix.
## The automatically generated colors map from the 1^st and 99^th of the
## values in the matrix. There are outliers in the matrix whose patterns
## might be hidden by this color mapping. You can manually set the color
## to `col` argument.
## 
## Use `suppressMessages()` to turn off this message.
## `use_raster` is automatically set to TRUE for a matrix with more than
## 2000 rows. You can control `use_raster` argument by explicitly setting
## TRUE/FALSE to it.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.

POST Heatmap

comp_vals_POST <- comp_vals_POST[apply(comp_vals_POST,1,sd)!=0,]
Heatmap(t(apply(as.matrix(comp_vals_POST),1,scale)),
    top_annotation = HeatmapAnnotation(PFS=anno_barplot(col_data_POST$PFS),
                                       response=col_data_POST$response,
                                       treatment=col_data_POST$treatment,
                                       col=list(response=c("CR_PR"="red","SD"="green","PD"="blue"),
                                                treatment=c("NIV1+IPI3"="red","NIV3-NAIVE"="green","NIV3-PROG"="blue"))),
    bottom_annotation = HeatmapAnnotation(OS=anno_barplot(col_data_POST$OS)),
    show_row_names=F,
    show_column_names = F,
    cluster_rows=F,
    cluster_columns=F)
## `use_raster` is automatically set to TRUE for a matrix with more than
## 2000 rows. You can control `use_raster` argument by explicitly setting
## TRUE/FALSE to it.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.

Heatmap(comp_vals_POST,
    top_annotation = HeatmapAnnotation(PFS=anno_barplot(col_data_POST$PFS),
                                       response=col_data_POST$response,
                                       treatment=col_data_POST$treatment,
                                       col=list(response=c("CR_PR"="red","SD"="green","PD"="blue"),
                                                treatment=c("NIV1+IPI3"="red","NIV3-NAIVE"="green","NIV3-PROG"="blue"))),
    bottom_annotation = HeatmapAnnotation(OS=anno_barplot(col_data_POST$OS)),
    show_row_names=F,
    show_column_names = F,
    cluster_rows=F,
    cluster_columns=F)
## Warning: The input is a data frame-like object, convert it to a matrix.
## The automatically generated colors map from the 1^st and 99^th of the
## values in the matrix. There are outliers in the matrix whose patterns
## might be hidden by this color mapping. You can manually set the color
## to `col` argument.
## 
## Use `suppressMessages()` to turn off this message.
## `use_raster` is automatically set to TRUE for a matrix with more than
## 2000 rows. You can control `use_raster` argument by explicitly setting
## TRUE/FALSE to it.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.

Pre average mean comparisons

responses<-c("CR_PR","SD","PD")
col_data_PRE <- col_data %>% dplyr::filter(time == "PRE" & response != "NE")

response_summ_PRE_list <- list()
for (i in seq(length(unique(col_data_PRE$treatment)))){
  treat<-unique(col_data_PRE$treatment)[i]
  response_summ_PRE <- data.frame(rownames(comp_vals_all))
  colnames(response_summ_PRE) <- "genes"
  rownames(response_summ_PRE) <- response_summ_PRE$genes
  col_data_PRE_small <- col_data_PRE %>% dplyr::filter(treatment == treat)
  for (resp in responses){
    samps <- col_data_PRE_small$samples[col_data_PRE_small$response==resp]
    if (length(samps)==0){
      response_summ_PRE[,resp] <- NA
    } else {
      comparisons <- col_data_PRE_small$comparisons[col_data_PRE_small$response==resp]
      genes_small <- unique(unlist(lapply(comparisons,function(comp){comp_genes[[comp]]})))
      response_summ_PRE[,resp] <- NA
      response_summ_PRE[genes_small,resp] <-apply(comp_vals_all[genes_small,samps],1,mean,na.rm=T)
      if (i==1 & resp=="CR_PR"){
          comp_vals_linear_fill <- unlist(lapply(samps,function(sample){return(comp_vals_all[genes_small,sample])}))
          comp_vals_linear_fill <- data.frame(SA=comp_vals_linear_fill,genes=rep(genes_small,length(samps)))
          comp_vals_linear_fill$treat <- treat
          comp_vals_linear_fill$response <- resp
          comp_vals_linear_fill$time <- "PRE"
          comp_vals_linear_PRE <- comp_vals_linear_fill
      } else {
          comp_vals_linear_fill <- unlist(lapply(samps,function(sample){return(comp_vals_all[genes_small,sample])}))
          comp_vals_linear_fill <- data.frame(SA=comp_vals_linear_fill,genes=rep(genes_small,length(samps)))
          comp_vals_linear_fill$treat <- treat
          comp_vals_linear_fill$response <- resp
          comp_vals_linear_fill$time <- "PRE"
          comp_vals_linear_PRE <- rbind(comp_vals_linear_PRE,comp_vals_linear_fill)
      }
    }
  }
  response_summ_PRE<-response_summ_PRE[,seq(2,ncol(response_summ_PRE))]
  resp_summ_nrow <- nrow(response_summ_PRE)
  response_summ_PRE_linear <- data.frame(c(response_summ_PRE$SD,response_summ_PRE$CR_PR,response_summ_PRE$PD),
                                     c(rep("SD",resp_summ_nrow),
                                       rep("CR_PR",resp_summ_nrow),
                                       rep("PD",resp_summ_nrow)),
                                       rep(rownames(response_summ_PRE),3))
  colnames(response_summ_PRE_linear) <- c("gene_metric","response","gene")
  response_summ_PRE_linear <- response_summ_PRE_linear[complete.cases(response_summ_PRE_linear),]
  response_summ_PRE_linear$time <- "PRE"
  response_summ_PRE_linear$treat <- treat
  
    print(ggplot(response_summ_PRE_linear,aes(x=gene_metric,group=response,fill=response))+
            geom_density(alpha=0.5)+labs(title=sprintf("PRE: %s",treat))+xlab("Splicing Antigenicity"))
    
  if (i==1){
    response_summ_PRE_all_linear <- response_summ_PRE_linear
  } else {
    response_summ_PRE_all_linear <- rbind(response_summ_PRE_all_linear,response_summ_PRE_linear)
  }
  
  my_comparisons <- list(c("CR_PR","PD"),
                        c("CR_PR","SD"),
                        c("PD","SD"))
  if (i==1){
    wilcox_test_val_PRE <- compare_means(gene_metric ~ response, 
                             comparisons = my_comparisons, p.adjust.method = "BH", data=response_summ_PRE_linear)
    wilcox_test_val_PRE$treat <- treat
    wilcox_test_val_PRE$group1_mean <- vapply(wilcox_test_val_PRE$group1,function(resp){
      mean(response_summ_PRE_linear$gene_metric[response_summ_PRE_linear$response==resp],na.rm=T)
    },numeric(1))
    wilcox_test_val_PRE$group2_mean <- vapply(wilcox_test_val_PRE$group2,function(resp){
      mean(response_summ_PRE_linear$gene_metric[response_summ_PRE_linear$response==resp],na.rm=T)
    },numeric(1))
    wilcox_test_val_PRE_all <- wilcox_test_val_PRE
  } else {
    wilcox_test_val_PRE <- compare_means(gene_metric ~ response, 
                         comparisons = my_comparisons, p.adjust.method = "BH", data=response_summ_PRE_linear)
    wilcox_test_val_PRE$treat <- treat
    wilcox_test_val_PRE$group1_mean <- vapply(wilcox_test_val_PRE$group1,function(resp){
      mean(response_summ_PRE_linear$gene_metric[response_summ_PRE_linear$response==resp],na.rm=T)
    },numeric(1))
    wilcox_test_val_PRE$group2_mean <- vapply(wilcox_test_val_PRE$group2,function(resp){
      mean(response_summ_PRE_linear$gene_metric[response_summ_PRE_linear$response==resp],na.rm=T)
    },numeric(1))
    wilcox_test_val_PRE_all <- rbind(wilcox_test_val_PRE_all,wilcox_test_val_PRE)
  }
  # wilcox_test_val$y.position <- c(max(response_summ_PRE_linear$gene_metric)+(max(response_summ_PRE_linear$gene_metric)/4),
  #                                 max(response_summ_PRE_linear$gene_metric)+(max(response_summ_PRE_linear$gene_metric)/6),
  #                                 max(response_summ_PRE_linear$gene_metric)+(max(response_summ_PRE_linear$gene_metric)/8))
  # print(ggplot(response_summ_PRE_linear,aes(y=gene_metric,x=response))+
  #         geom_boxplot()+
  #         labs(title=sprintf("%s: %s",treat,"PRE"))+
  #         stat_pvalue_manual(wilcox_test_val,
  #                            label="p.adj"))
  # print(ggplot(response_summ_PRE_linear,aes(y=log10(gene_metric+1),x=response,fill=response))+
  #   geom_violin()+
  #   labs(title="PRE")+
  #   stat_compare_means(comparisons = my_comparisons,method = "wilcox.test"))
  # 
  # print(ggplot(response_summ_PRE_linear,aes(x=log10(gene_metric+1),fill=response))+
  #   geom_density(alpha=0.4)+
  #   labs(title="PRE"))
}

wilcox_test_val_PRE_all$G1_ov_G2 <- wilcox_test_val_PRE_all$group1_mean/wilcox_test_val_PRE_all$group2_mean
wilcox_test_val_PRE_all$time <- "PRE"

print(ggplot(response_summ_PRE_all_linear,aes(x=gene_metric,group=response,fill=response))+
        geom_density(alpha=0.5)+labs(title="PRE: All Treatments"))+xlab("Splicing Antigenicity")

Post average mean comparisons

responses<-c("CR_PR","SD","PD")
col_data_POST <- col_data %>% dplyr::filter(time == "POST" & response != "NE")

response_summ_POST_list <- list()
for (i in seq(length(unique(col_data_POST$treatment)))){
  treat<-unique(col_data_POST$treatment)[i]
  response_summ_POST <- data.frame(rownames(comp_vals_all))
  colnames(response_summ_POST) <- "genes"
  rownames(response_summ_POST) <- response_summ_POST$genes
  col_data_POST_small <- col_data_POST %>% dplyr::filter(treatment == treat)
  for (resp in responses){
    samps <- col_data_POST$samples[col_data_POST$response==resp]
    if (length(samps)==0){
      response_summ_POST[,resp] <- NA
    } else {
      comparisons <- col_data_POST_small$comparisons[col_data_POST_small$response==resp]
      genes_small <- unique(unlist(lapply(comparisons,function(comp){comp_genes[[comp]]})))
      response_summ_POST[,resp] <- NA
      response_summ_POST[genes_small,resp] <- apply(comp_vals_all[genes_small,samps],1,mean,na.rm=T)
      response_summ_POST$treat <- treat
      if (i==1 & resp=="CR_PR"){
        comp_vals_linear_fill <- unlist(lapply(samps,function(sample){return(comp_vals_all[genes_small,sample])}))
        comp_vals_linear_fill <- data.frame(SA=comp_vals_linear_fill,genes=rep(genes_small,length(samps)))
        comp_vals_linear_fill$treat <- treat
        comp_vals_linear_fill$response <- resp
        comp_vals_linear_fill$time <- "POST"
        comp_vals_linear_POST <- comp_vals_linear_fill
      } else {
        comp_vals_linear_fill <- unlist(lapply(samps,function(sample){return(comp_vals_all[genes_small,sample])}))
        comp_vals_linear_fill <- data.frame(SA=comp_vals_linear_fill,genes=rep(genes_small,length(samps)))
        comp_vals_linear_fill$treat <- treat
        comp_vals_linear_fill$response <- resp
        comp_vals_linear_fill$time <- "POST"
        comp_vals_linear_POST <- rbind(comp_vals_linear_POST,comp_vals_linear_fill)
      }
    }
  }
  response_summ_POST<-response_summ_POST[,seq(2,ncol(response_summ_POST))]
  resp_summ_nrow <- nrow(response_summ_POST)
  response_summ_POST_linear <- data.frame(c(response_summ_POST$SD,response_summ_POST$CR_PR,response_summ_POST$PD),
                                     c(rep("SD",resp_summ_nrow),
                                       rep("CR_PR",resp_summ_nrow),
                                       rep("PD",resp_summ_nrow)),
                                       rep(rownames(response_summ_POST),3))
  colnames(response_summ_POST_linear) <- c("gene_metric","response","gene")
  response_summ_POST_linear <- response_summ_POST_linear[complete.cases(response_summ_POST_linear),]
  response_summ_POST_linear$time <- "POST"
  response_summ_POST_linear$treat <- treat

  print(ggplot(response_summ_POST_linear,aes(x=gene_metric,group=response,fill=response))+
        geom_density(alpha=0.5)+labs(title=sprintf("POST: %s",treat))+xlab("Splicing Antigenicity"))
      
  if (i==1){
    response_summ_POST_all_linear <- response_summ_POST_linear
  } else {
    response_summ_POST_all_linear <- rbind(response_summ_POST_all_linear,response_summ_POST_linear)
  }
  
  my_comparisons <- list(c("CR_PR","PD"),
                        c("CR_PR","SD"),
                        c("PD","SD"))
    if (i==1){
    wilcox_test_val_POST <- compare_means(gene_metric ~ response, 
                             comparisons = my_comparisons, p.adjust.method = "BH", data=response_summ_POST_linear)
    wilcox_test_val_POST$treat <- treat
    wilcox_test_val_POST$group1_mean <- vapply(wilcox_test_val_POST$group1,function(resp){
      mean(response_summ_POST_linear$gene_metric[response_summ_POST_linear$response==resp],na.rm=T)
    },numeric(1))
    wilcox_test_val_POST$group2_mean <- vapply(wilcox_test_val_POST$group2,function(resp){
      mean(response_summ_POST_linear$gene_metric[response_summ_POST_linear$response==resp],na.rm=T)
    },numeric(1))
    wilcox_test_val_POST_all <- wilcox_test_val_POST
  } else {
    wilcox_test_val_POST <- compare_means(gene_metric ~ response, 
                         comparisons = my_comparisons, p.adjust.method = "BH", data=response_summ_POST_linear)
    wilcox_test_val_POST$treat <- treat
    wilcox_test_val_POST$group1_mean <- vapply(wilcox_test_val_POST$group1,function(resp){
      mean(response_summ_POST_linear$gene_metric[response_summ_POST_linear$response==resp],na.rm=T)
    },numeric(1))
    wilcox_test_val_POST$group2_mean <- vapply(wilcox_test_val_POST$group2,function(resp){
      mean(response_summ_POST_linear$gene_metric[response_summ_POST_linear$response==resp],na.rm=T)
    },numeric(1))
    wilcox_test_val_POST_all <- rbind(wilcox_test_val_POST_all,wilcox_test_val_POST)
  }
  
  # wilcox_test_val$y.position <- c(max(response_summ_POST_linear$gene_metric)+(max(response_summ_POST_linear$gene_metric)/4),
  #                                 max(response_summ_POST_linear$gene_metric)+(max(response_summ_POST_linear$gene_metric)/6),
  #                                 max(response_summ_POST_linear$gene_metric)+(max(response_summ_POST_linear$gene_metric)/8))
  # print(ggplot(response_summ_POST_linear,aes(y=gene_metric,x=response))+
  #   geom_boxplot(aes(fill=response))+
  #   labs(title=sprintf("%s: %s",treat,"POST"))+
  #     stat_pvalue_manual(wilcox_test_val,label="p.adj"))

  # ggplot(response_summ_POST_linear,aes(y=log10(gene_metric+1),x=response,fill=response))+
  #   geom_violin()+
  #   labs(title="POST")+
  #   stat_compare_means(comparisons = my_comparisons,method = "wilcox.test")
  # 
  # ggplot(response_summ_POST_linear,aes(x=log10(gene_metric+1),fill=response))+
  #   geom_density(alpha=0.4)+
  #   labs(title="POST")
}

wilcox_test_val_POST_all$G1_ov_G2 <- wilcox_test_val_POST_all$group1_mean/wilcox_test_val_POST_all$group2_mean
wilcox_test_val_POST_all$time <- "POST"


print(ggplot(response_summ_POST_all_linear,aes(x=gene_metric,group=response,fill=response))+
        geom_density(alpha=0.5)+labs(title="POST: All Treatments"))+xlab("Splicing Antigenicity")

Fig 7: Comparing response before and after treatment

response_summ_all <- rbind(response_summ_PRE_all_linear,response_summ_POST_all_linear)
treats <- unique(response_summ_all$treat)
times <- unique(response_summ_all$time)
responses <-  unique(response_summ_all$response)
my_comparisons <- list(c("CR_PR vs PD","PD vs all responders"),
                      c("CR_PR vs PD","SD vs PD"),
                      c("PD vs all responsders","SD vs PD"))

# response_summ_all$response <- factor(response_summ_all$response,levels=c("CR_PR","SD","PD"))
response_summ_all$time <- factor(response_summ_all$time,levels=c("PRE","POST"))
response_summ_all$response <- vapply(response_summ_all$response,function(respo){
  if (respo=="CR_PR"){
    return("CR_PR vs PD")
  } else if(respo=="SD"){
    return("SD vs PD")
  } else if (respo=="PD"){
    return("PD vs all responders")
  }
},character(1))
response_summ_all$response <- factor(response_summ_all$response,levels=c("CR_PR vs PD","SD vs PD","PD vs all responders"))
# response_summ_all$resp <- factor(response_summ_all$resp,levels=c("CR_PR vs PD","SD vs PD","PD vs all responders"))

comp_val_all_linear <- rbind(comp_vals_linear_PRE,comp_vals_linear_POST)
comp_val_all_linear$response <- vapply(comp_val_all_linear$response,function(respo){
  if (respo=="CR_PR"){
    return("CR_PR vs PD")
  } else if(respo=="SD"){
    return("SD vs PD")
  } else if (respo=="PD"){
    return("PD vs all responders")
  }
},character(1))
wilcox_test_val_genes <- compare_means(SA ~ time, p.adjust.method = "BH", 
                         data=comp_val_all_linear,
                         group.by = c("response","treat","genes"))
sig_genes <- unique(wilcox_test_val_genes$genes[wilcox_test_val_genes$p<=0.05])

wilcox_test_val_genes_resp <- compare_means(SA ~ response, p.adjust.method = "BH", 
                                            data=comp_val_all_linear,group.by = c("time","treat","genes"))
sig_genes_resp <- unique(wilcox_test_val_genes_resp$genes[wilcox_test_val_genes_resp$p.adj<=0.05])
sig_genes_all <- intersect(sig_genes,sig_genes_resp)

response_summ_all_filt <- response_summ_all[response_summ_all$gene %in% sig_genes_all,]
response_summ_all_filt$response <- factor(response_summ_all_filt$response,levels=c("CR_PR vs PD","SD vs PD","PD vs all responders"))
# response_summ_all_filt$resp <- factor(response_summ_all_filt$resp,levels=c("CR_PR vs PD","SD vs PD","PD vs all responders"))
wilcox_test_val_all_first <- compare_means(gene_metric ~ time,
                                           p.adjust.method = "BH", 
                                           data=response_summ_all_filt,
                                           group.by = c("response","treat"))

response_summ_all_filt <- response_summ_all[response_summ_all$gene %in% sig_genes_all,]
ggplot(response_summ_all_filt,aes(x=time,y=gene_metric))+
  geom_boxplot(aes(fill=response))+geom_violin(alpha=0.2,aes(fill=response))+
  facet_grid(treat~response)+
  add_pvalue(wilcox_test_val_all_first, label = "p.adj", remove.bracket = TRUE,y.position=0.35)+
  ylab("Splicing Antigenicity")+
  xlab("Treatment Time")+
  ylim(c(0,0.4))+
  labs(fill="Outlier Splicing Comparison")

wilcox_test_val_all <- compare_means(gene_metric ~ response,
                                     p.adjust.method = "BH", 
                                     data=response_summ_all_filt,
                                     group.by = c("treat","time"))
wilcox_test_val_all$y.position <- c(0.35, 0.3, 0.25,0.35, 0.3, 0.25,0.35, 0.3, 0.25,0.35, 0.3, 0.25,0.35, 0.3, 0.25,0.35, 0.3, 0.25)

ggplot(response_summ_all_filt,aes(x=response,y=gene_metric))+
  geom_boxplot(aes(fill=response))+geom_violin(alpha=0.2,aes(fill=response))+
  facet_grid(time~treat)+
  add_pvalue(wilcox_test_val_all, label = "p.adj", remove.bracket = TRUE)+
  ylab("Splicing Antigenicity")+
  xlab("Outlier Splicing Comparison")+
  ylim(c(0,0.4))+
  theme(axis.ticks.x=element_blank(),
        axis.text.x=element_blank())+
  labs(fill="Outlier Splicing Comparison")

Calculating effect sizes

resp <- c("CR_PR vs PD","SD vs PD","PD vs all responders")
treatment <- c("NIV1+IPI3","NIV3-NAIVE","NIV3-PROG")
response_summ_all_filt_PRE <- response_summ_all_filt[response_summ_all_filt$time=="PRE",]
response_summ_all_filt_POST <- response_summ_all_filt[response_summ_all_filt$time=="POST",]

responses <- data.frame(t(combn(resp,2)))
colnames(responses) <- c("response_1","response_2")
effect_sizes_PRE <- data.frame(vapply(treatment,function(treat){
  vapply(seq(nrow(responses)),function(row_val){
    resp_1 <- responses[row_val,1]
    resp_2 <- responses[row_val,2]
    response_summ_all_filt_small_1 <- response_summ_all_filt_PRE[response_summ_all_filt_PRE$resp %in% resp_1 & response_summ_all_filt_PRE$treat %in% treat,]
    response_summ_all_filt_small_2 <- response_summ_all_filt_PRE[response_summ_all_filt_PRE$resp %in% resp_2 & response_summ_all_filt_PRE$treat %in% treat,]
    
    sd_1 <- sd(response_summ_all_filt_small_1$gene_metric)
    mean_1 <- mean(response_summ_all_filt_small_1$gene_metric)
    mean_2 <- mean(response_summ_all_filt_small_2$gene_metric)
    return((mean_1-mean_2)/sd_1)
  },numeric(1))
},numeric(3)))
effect_sizes_PRE$time <- "PRE"
effect_sizes_PRE <- cbind(effect_sizes_PRE,responses)

effect_sizes_POST <- data.frame(vapply(treatment,function(treat){
  vapply(seq(nrow(responses)),function(row_val){
    resp_1 <- responses[row_val,1]
    resp_2 <- responses[row_val,2]
    response_summ_all_filt_small_1 <- response_summ_all_filt_POST[response_summ_all_filt_POST$resp %in% resp_1 & response_summ_all_filt_POST$treat %in% treat,]
    response_summ_all_filt_small_2 <- response_summ_all_filt_POST[response_summ_all_filt_POST$resp %in% resp_2 & response_summ_all_filt_POST$treat %in% treat,]
    
    sd_1 <- sd(response_summ_all_filt_small_1$gene_metric)
    mean_1 <- mean(response_summ_all_filt_small_1$gene_metric)
    mean_2 <- mean(response_summ_all_filt_small_2$gene_metric)
    return((mean_1-mean_2)/sd_1)
  },numeric(1))
},numeric(3)))
effect_sizes_POST$time <- "POST"
effect_sizes_POST <- cbind(effect_sizes_POST,responses)

effect_sizes <- rbind(effect_sizes_PRE,effect_sizes_POST)
datatable(effect_sizes,caption="Effect Sizes")
write.csv(effect_sizes,file="F:/Valsamo/analysis/supplement/effect_sizes_response.csv",quote=F)

effect_sizes_POST_PRE <- data.frame(vapply(treatment,function(treat){
  vapply(resp,function(resp_val){
    response_summ_all_filt_small_1 <- response_summ_all_filt_POST[response_summ_all_filt_POST$resp %in% resp_val & response_summ_all_filt_POST$treat %in% treat,]
    response_summ_all_filt_small_2 <- response_summ_all_filt_PRE[response_summ_all_filt_PRE$resp %in% resp_val & response_summ_all_filt_PRE$treat %in% treat,]
    
    sd_1 <- sd(response_summ_all_filt_small_1$gene_metric)
    mean_1 <- mean(response_summ_all_filt_small_1$gene_metric)
    mean_2 <- mean(response_summ_all_filt_small_2$gene_metric)
    return((mean_1-mean_2)/sd_1)
  },numeric(1))
},numeric(3)))
effect_sizes_POST_PRE$time_1 <- "POST"
effect_sizes_POST_PRE$time_2 <- "PRE"
effect_sizes_POST_PRE$response <- rownames(effect_sizes_POST_PRE)
datatable(effect_sizes_POST_PRE,caption="Effect Sizes")
write.csv(effect_sizes_POST_PRE,file="F:/Valsamo/analysis/supplement/effect_sizes_time.csv",quote=F)

Supplement: Looking at Pre and Post Data Per Comparison

comps <- unique(col_data$comp)
subject <- unique(col_data$subj_id)

per_samp_summary<-as.data.frame(t(vapply(subject,function(subj){
  subj_data <- col_data %>% dplyr::filter(subj_id == subj)
  if (nrow(subj_data) == 1){
    return(rep("-1",4))
  } else {
    subj_data <- subj_data[order(subj_data$time,decreasing=T),]
    target_comp_vals <- comp_vals_all[,subj_data$samples]
    samp_pre <- colnames(target_comp_vals)[which(str_detect(colnames(target_comp_vals),"PRE"))]
    samp_post <- colnames(target_comp_vals)[which(str_detect(colnames(target_comp_vals),"POST"))]
    comparisons <- col_data$comparisons[col_data$subj_id==subj]
    pre_comp <- comparisons[which(str_detect(comparisons,"PRE"))]
    post_comp <- comparisons[which(str_detect(comparisons,"POST"))]
    genes_small_pre <- comp_genes[[pre_comp]]
    genes_small_post <- comp_genes[[post_comp]]
    treatment <- col_data$treatment[col_data$subj_id==subj][1]
    response <- col_data$response[col_data$subj_id==subj][1]
    return(c(as.character(mean(target_comp_vals[genes_small_pre[genes_small_pre %in% sig_genes_all],samp_pre],na.rm=T)),
             as.character(mean(target_comp_vals[genes_small_post[genes_small_post %in% sig_genes_all],samp_post],na.rm=T)),
             treatment,response))
  }
},character(4))))
colnames(per_samp_summary)<-c("PRE","POST","treatment","response")
per_samp_summary$PRE <- as.numeric(per_samp_summary$PRE)
per_samp_summary$POST <- as.numeric(per_samp_summary$POST)
per_samp_summary <- per_samp_summary %>% dplyr::filter(PRE != -1)
per_samp_summary_linear <- data.frame(rep(rownames(per_samp_summary),2),
                                      c(per_samp_summary$PRE,per_samp_summary$POST),
                                      c(rep("PRE",nrow(per_samp_summary)),rep("POST",nrow(per_samp_summary))),
                                      rep(per_samp_summary$treatment,2),
                                      rep(per_samp_summary$response,2))
colnames(per_samp_summary_linear) <- c("sample","splicing_antigenicity","time","treatment","response")
per_samp_summary_linear <- per_samp_summary_linear %>% dplyr::filter(response != "NE")

per_samp_summary_linear_NIV1_IPI3<-per_samp_summary_linear %>% dplyr::filter(treatment=="NIV1+IPI3")
my_comparisons <- list(c("POST", "PRE"))
ggplot(per_samp_summary_linear_NIV1_IPI3,aes(x=time,y=splicing_antigenicity,color=response))+
  geom_point()+
  geom_line(aes(group=sample))+
  geom_boxplot(aes(x=time,y=splicing_antigenicity),fill="black",color="black",alpha=0.2)+
  stat_compare_means(method = "t.test",comparisons = my_comparisons,paired = T)+
  labs(title="NIV1+IPI3")+xlab("Treatment Time")

per_samp_summary_linear_NIV3_NAIVE<-per_samp_summary_linear %>% dplyr::filter(treatment=="NIV3-NAIVE")
my_comparisons <- list(c("POST", "PRE"))
ggplot(per_samp_summary_linear_NIV3_NAIVE,aes(x=time,y=splicing_antigenicity,color=response))+
  geom_point()+
  geom_line(aes(group=sample))+
  geom_boxplot(aes(x=time,y=splicing_antigenicity),fill="black",color="black",alpha=0.2)+
  stat_compare_means(method = "t.test",comparisons = my_comparisons,paired = T)+
  labs(title="NIV3-NAIVE")+xlab("Treatment Time")

per_samp_summary_linear_NIV3_PROG<-per_samp_summary_linear %>% dplyr::filter(treatment=="NIV3-PROG")
my_comparisons <- list(c("POST", "PRE"))
ggplot(per_samp_summary_linear_NIV3_PROG,aes(x=time,y=splicing_antigenicity,color=response))+
  geom_point()+
  geom_line(aes(group=sample))+
  geom_boxplot(aes(x=time,y=splicing_antigenicity),fill="black",color="black",alpha=0.2)+
  stat_compare_means(method = "t.test",comparisons = my_comparisons,paired = T)+
  labs(title="NIV3-PROG")+xlab("Treatment Time")

Looking at Pre and Post data per sample and gene

# using comp_vals_all
# using col_data

splicing_antigenicity_PRE_POST <- data.frame(t(rep(NA,7)))
colnames(splicing_antigenicity_PRE_POST)<-c("genes","PRE","POST","PRE_ov_POST","subject","response","treatment")
# rownames(splicing_antigenicity_PRE_POST) <- splicing_antigenicity_PRE_POST$genes
# splicing_antigenicity_PRE_POST$PRE_ov_POST <- NA
subjects <- unique(col_data$subj_id)
for (i in seq(length(subjects))){
  subject <- subjects[i]
  a <- which(col_data$subj_id==subject)
  if (length(a)>1){
    col_data_small <- col_data[a,]
    PRE_samp <- col_data_small[which(col_data_small$time=="PRE"),"samples"]
    PRE_comp <- col_data_small[which(col_data_small$time=="PRE"),"comparisons"]
    PRE_genes <- comp_genes[[PRE_comp]][comp_genes[[PRE_comp]] %in% sig_genes_all]
    PRE_response <- col_data_small[which(col_data_small$time=="PRE"),"response"]
    PRE_treatment <- col_data_small[which(col_data_small$time=="PRE"),"treatment"]
    PRE_splicing_antigenicity <- comp_vals_all[PRE_genes,PRE_samp]
    POST_samp <- col_data_small[which(col_data_small$time=="POST"),"samples"]
    POST_comp <- col_data_small[which(col_data_small$time=="POST"),"comparisons"]
    POST_genes <- comp_genes[[POST_comp]][comp_genes[[POST_comp]] %in% sig_genes_all]
    POST_response <- col_data_small[which(col_data_small$time=="POST"),"response"]
    POST_treatment <- col_data_small[which(col_data_small$time=="POST"),"treatment"]
    POST_splicing_antigenicity <- comp_vals_all[POST_genes,POST_samp]
    
    splicing_antigenicity_PRE_POST_filler <- data.frame(genes=unique(c(PRE_genes,POST_genes)))
    rownames(splicing_antigenicity_PRE_POST_filler)<-splicing_antigenicity_PRE_POST_filler$genes
    splicing_antigenicity_PRE_POST_filler[PRE_genes,"PRE"] <- PRE_splicing_antigenicity
    splicing_antigenicity_PRE_POST_filler[POST_genes,"POST"] <- POST_splicing_antigenicity
    splicing_antigenicity_PRE_POST_filler$PRE_ov_POST <- vapply(seq(nrow(splicing_antigenicity_PRE_POST_filler)),function(row_val){
      PRE_val <- splicing_antigenicity_PRE_POST_filler[row_val,"PRE"]
      POST_val <- splicing_antigenicity_PRE_POST_filler[row_val,"POST"]
      if (is.na(PRE_val) & is.na(POST_val)){
        return(1)
      } else if (is.na(PRE_val)){
        if(POST_val==0){
          return(1)
        } else {
          return(POST_val)
        }
      } else if (is.na(POST_val)){
        if(PRE_val==0){
          return(1)
        } else {
          return(PRE_val)
        }
      } else if (PRE_val==0 & POST_val==0){
        return(1)
      } else if (PRE_val==0 & POST_val != 0){
        return(POST_val)
      } else if (PRE_val!=0 & POST_val ==0){
        return(PRE_val)
      } else {
        return(PRE_val/POST_val)
      }
    },numeric(1))
    splicing_antigenicity_PRE_POST_filler$subject <- subject
    splicing_antigenicity_PRE_POST_filler$response <- PRE_response
    splicing_antigenicity_PRE_POST_filler$treatment <- PRE_treatment
    splicing_antigenicity_PRE_POST <- rbind(splicing_antigenicity_PRE_POST,splicing_antigenicity_PRE_POST_filler)
  }
}
splicing_antigenicity_PRE_POST <- splicing_antigenicity_PRE_POST[seq(2,nrow(splicing_antigenicity_PRE_POST)),]
splicing_antigenicity_PRE_POST$POST_ov_PRE <- 1/splicing_antigenicity_PRE_POST$PRE_ov_POST

Fig. 8: boxplots

splicing_antigenicity_PRE_POST <- splicing_antigenicity_PRE_POST[splicing_antigenicity_PRE_POST$response!="NE",]

splicing_antigenicity_PRE_POST <- splicing_antigenicity_PRE_POST[order(splicing_antigenicity_PRE_POST$response,splicing_antigenicity_PRE_POST$subject),]
splicing_antigenicity_PRE_POST$subject <- factor(splicing_antigenicity_PRE_POST$subject,levels=unique(splicing_antigenicity_PRE_POST$subject))
splicing_antigenicity_PRE_POST$response <- vapply(splicing_antigenicity_PRE_POST$response,function(respo){
  if (respo=="CR_PR"){
    return("CR_PR vs PD")
  } else if(respo=="SD"){
    return("SD vs PD")
  } else if (respo=="PD"){
    return("PD vs all responders")
  }
},character(1))
splicing_antigenicity_PRE_POST$response <- factor(splicing_antigenicity_PRE_POST$response,levels=c("CR_PR vs PD","SD vs PD","PD vs all responders"))

wilcox_test_val_DA <- compare_means(POST_ov_PRE ~ response, p.adjust.method = "BH", data=splicing_antigenicity_PRE_POST)
wilcox_test_val_DA_resp <- compare_means(POST_ov_PRE ~ response, p.adjust.method = "BH", data=splicing_antigenicity_PRE_POST,group.by=c("treatment"))

datatable(wilcox_test_val_DA,caption="Differential Agretopicity All Treatments")
datatable(wilcox_test_val_DA_resp,caption="Differential Agretopicity By Treatment")
# splicing_antigenicity_PRE_POST$resp <- splicing_antigenicity_PRE_POST$response
ggplot(splicing_antigenicity_PRE_POST,aes(y=log2(POST_ov_PRE),x=subject))+
  geom_boxplot(aes(fill=response))+geom_violin(alpha=0.2,aes(fill=response))+
  # facet_grid(treatment~.)+
  geom_hline(yintercept=0)+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank())+
  ylab("log2(POST SA / PRE SA)")

wilcox_test_val_DA$y.position <- c(11.6,10.8,10)
ggplot(splicing_antigenicity_PRE_POST,aes(y=log2(POST_ov_PRE),x=response))+
  geom_boxplot(aes(fill=response))+geom_violin(alpha=0.2,aes(fill=response))+
  # facet_grid(treatment~.)+
  geom_hline(yintercept=0)+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank())+
  ylab("log2(POST SA / PRE SA)")+
  add_pvalue(wilcox_test_val_DA, label = "p.adj", remove.bracket = TRUE)

splicing_antigenicity_PRE_POST_NIV1_IPI3 <- splicing_antigenicity_PRE_POST[splicing_antigenicity_PRE_POST$treatment=="NIV1+IPI3",]
splicing_antigenicity_PRE_POST_NIV3_NAIVE <- splicing_antigenicity_PRE_POST[splicing_antigenicity_PRE_POST$treatment=="NIV3-NAIVE",]
splicing_antigenicity_PRE_POST_NIV3_PROG <- splicing_antigenicity_PRE_POST[splicing_antigenicity_PRE_POST$treatment=="NIV3-PROG",]

# splicing_antigenicity_PRE_POST_NIV1_IPI3 <- splicing_antigenicity_PRE_POST_NIV1_IPI3[order(splicing_antigenicity_PRE_POST_NIV1_IPI3$response,splicing_antigenicity_PRE_POST_NIV1_IPI3$subject),]
# splicing_antigenicity_PRE_POST_NIV1_IPI3$subject <- factor(splicing_antigenicity_PRE_POST_NIV1_IPI3$subject,levels=unique(splicing_antigenicity_PRE_POST_NIV1_IPI3$subject))
ggplot(splicing_antigenicity_PRE_POST_NIV1_IPI3,aes(y=log2(POST_ov_PRE),x=subject))+
  geom_boxplot(aes(fill=response))+geom_violin(alpha=0.2,aes(fill=response))+
  geom_hline(yintercept=0)+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank())+
  ylim(c(min(log2(splicing_antigenicity_PRE_POST$POST_ov_PRE)),max(log2(splicing_antigenicity_PRE_POST$POST_ov_PRE))))+
  labs(title="NIV1+IPI3")+
  ylab("log2(POST SA / PRE SA)")

wilcox_test_val_DA_resp_NIV1_IPI3 <- wilcox_test_val_DA_resp[wilcox_test_val_DA_resp$treatment=="NIV1+IPI3",]
wilcox_test_val_DA_resp_NIV1_IPI3$y.position <- c(11.6,10.8,10)
ggplot(splicing_antigenicity_PRE_POST_NIV1_IPI3,aes(y=log2(POST_ov_PRE),x=response))+
  geom_boxplot(aes(fill=response))+geom_violin(alpha=0.2,aes(fill=response))+
  # facet_grid(treatment~.)+
  geom_hline(yintercept=0)+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank())+
  labs(title="NIV1+IPI3")+
  ylab("log2(POST SA / PRE SA)")+
  add_pvalue(wilcox_test_val_DA_resp_NIV1_IPI3, label = "p.adj", remove.bracket = TRUE)

# splicing_antigenicity_PRE_POST_NIV3_NAIVE <- splicing_antigenicity_PRE_POST_NIV3_NAIVE[order(splicing_antigenicity_PRE_POST_NIV3_NAIVE$response,splicing_antigenicity_PRE_POST_NIV3_NAIVE$subject),]
# splicing_antigenicity_PRE_POST_NIV3_NAIVE$subject <- factor(splicing_antigenicity_PRE_POST_NIV3_NAIVE$subject,levels=unique(splicing_antigenicity_PRE_POST_NIV3_NAIVE$subject))
ggplot(splicing_antigenicity_PRE_POST_NIV3_NAIVE,aes(y=log2(POST_ov_PRE),x=subject,fill=response))+
  geom_boxplot()+geom_violin(alpha=0.2)+
  geom_hline(yintercept=0)+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank())+
  ylim(c(min(log2(splicing_antigenicity_PRE_POST$POST_ov_PRE)),max(log2(splicing_antigenicity_PRE_POST$POST_ov_PRE))))+
  labs(title="NIV3 NAIVE")+
  ylab("log2(POST SA / PRE SA)")

wilcox_test_val_DA_resp_NIV3_NAIVE <- wilcox_test_val_DA_resp[wilcox_test_val_DA_resp$treatment=="NIV3-NAIVE",]
wilcox_test_val_DA_resp_NIV3_NAIVE$y.position <- c(11.6,10.8,10)
ggplot(splicing_antigenicity_PRE_POST_NIV3_NAIVE,aes(y=log2(POST_ov_PRE),x=response))+
  geom_boxplot(aes(fill=response))+geom_violin(alpha=0.2,aes(fill=response))+
  # facet_grid(treatment~.)+
  geom_hline(yintercept=0)+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank())+
  labs(title="NIV3-NAIVE")+
  ylab("log2(POST SA / PRE SA)")+
  add_pvalue(wilcox_test_val_DA_resp_NIV3_NAIVE, label = "p.adj", remove.bracket = TRUE)

splicing_antigenicity_PRE_POST_NIV3_PROG <- splicing_antigenicity_PRE_POST_NIV3_PROG[order(splicing_antigenicity_PRE_POST_NIV3_PROG$response,splicing_antigenicity_PRE_POST_NIV3_PROG$subject),]
# splicing_antigenicity_PRE_POST_NIV3_PROG$subject <- factor(splicing_antigenicity_PRE_POST_NIV3_PROG$subject,levels=unique(splicing_antigenicity_PRE_POST_NIV3_PROG$subject))
ggplot(splicing_antigenicity_PRE_POST_NIV3_PROG,aes(y=log2(POST_ov_PRE),x=subject))+
  geom_boxplot(aes(fill=response))+geom_violin(alpha=0.2,aes(fill=response))+
  geom_hline(yintercept=0)+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank())+
  ylim(c(min(log2(splicing_antigenicity_PRE_POST$POST_ov_PRE)),max(log2(splicing_antigenicity_PRE_POST$POST_ov_PRE))))+
  labs(title="NIV3 PROG")+
  ylab("log2(POST SA / PRE SA)")

wilcox_test_val_DA_resp_NIV3_PROG <- wilcox_test_val_DA_resp[wilcox_test_val_DA_resp$treatment=="NIV3-PROG",]
wilcox_test_val_DA_resp_NIV3_PROG$y.position <- c(11.6,10.8,10)
splicing_antigenicity_PRE_POST_NIV3_PROG$resp <- splicing_antigenicity_PRE_POST_NIV3_PROG$response
ggplot(splicing_antigenicity_PRE_POST_NIV3_PROG,aes(y=log2(POST_ov_PRE),x=response))+
  geom_boxplot(aes(fill=response))+geom_violin(alpha=0.2,aes(fill=response))+
  # facet_grid(treatment~.)+
  geom_hline(yintercept=0)+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank())+
  labs(title="NIV3-PROG")+
  ylab("log2(POST SA / PRE SA)")+
  add_pvalue(wilcox_test_val_DA_resp_NIV3_PROG, label = "p.adj", remove.bracket = TRUE)

Effect Sizes

resp <- c("CR_PR vs PD","SD vs PD","PD vs all responders")
treatment <- unique(splicing_antigenicity_PRE_POST$treatment)
responses <- data.frame(t(combn(resp,2)))
colnames(responses) <- c("response_1","response_2")
effect_sizes_POST_PRE <- data.frame(vapply(treatment,function(treat){
  print(treat)
  vapply(seq(nrow(responses)),function(row_val){
    resp_1 <- responses[row_val,1]
    resp_2 <- responses[row_val,2]
    splicing_antigenicity_PRE_POST_small_1 <- splicing_antigenicity_PRE_POST[splicing_antigenicity_PRE_POST$resp %in% resp_1 & splicing_antigenicity_PRE_POST$treatment %in% treat,]
    splicing_antigenicity_PRE_POST_small_2 <- splicing_antigenicity_PRE_POST[splicing_antigenicity_PRE_POST$resp %in% resp_2 & splicing_antigenicity_PRE_POST$treatment %in% treat,]
    
    sd_1 <- sd(splicing_antigenicity_PRE_POST_small_1$POST_ov_PRE)
    mean_1 <- mean(splicing_antigenicity_PRE_POST_small_1$POST_ov_PRE)
    mean_2 <- mean(splicing_antigenicity_PRE_POST_small_2$POST_ov_PRE)
    return((mean_1-mean_2)/sd_1)
  },numeric(1))
},numeric(3)))
## [1] "NIV3-NAIVE"
## [1] "NIV3-PROG"
## [1] "NIV1+IPI3"
effect_sizes_POST_PRE <- cbind(effect_sizes_POST_PRE,responses)
datatable(effect_sizes_POST_PRE,caption="Effect Sizes")
write.csv(effect_sizes_POST_PRE,file="F:/Valsamo/analysis/supplement/effect_sizes_DA.csv",quote=F)

effect_sizes_POST_PRE <- data.frame(vapply(seq(nrow(responses)),function(row_val){
  resp_1 <- responses[row_val,1]
  resp_2 <- responses[row_val,2]
  splicing_antigenicity_PRE_POST_small_1 <- splicing_antigenicity_PRE_POST[splicing_antigenicity_PRE_POST$resp %in% resp_1,]
  splicing_antigenicity_PRE_POST_small_2 <- splicing_antigenicity_PRE_POST[splicing_antigenicity_PRE_POST$resp %in% resp_2,]
  
  sd_1 <- sd(splicing_antigenicity_PRE_POST_small_1$POST_ov_PRE)
  mean_1 <- mean(splicing_antigenicity_PRE_POST_small_1$POST_ov_PRE)
  mean_2 <- mean(splicing_antigenicity_PRE_POST_small_2$POST_ov_PRE)
  return((mean_1-mean_2)/sd_1)
},numeric(1)))

effect_sizes_POST_PRE <- cbind(effect_sizes_POST_PRE,responses)
colnames(effect_sizes_POST_PRE) <- c("effect_size",colnames(responses))
datatable(effect_sizes_POST_PRE,caption="Effect Sizes")
write.csv(effect_sizes_POST_PRE,file="F:/Valsamo/analysis/supplement/effect_sizes_DA_all.csv",quote=F)

Cibersort data

reloading manifest data

Loading in cibersort data

cibersort_data <- read.table(mod_path("/mnt/f/Valsamo/cibersort/valsamo_cibersort_09062022.txt"),header=T,sep="\t")

col_data_filt <- col_data[col_data$response!="NE",]

# cibersort_data_filt <- cibersort_data[cibersort_data$P.value<=0.05,]
cibersort_data_filt <- cibersort_data

# cibersort_samples <- cibersort_data$Mixture
# cibersort_cell_types <- 

for (treat in unique(col_data_filt$treatment)){
  
  col_data_all <- col_data_filt[col_data_filt$treatment==treat,]
  cibersort_data_all <- cibersort_data[cibersort_data$Mixture %in% col_data_all$sample,,drop=F]
  rownames(cibersort_data_all) <- cibersort_data_all$Mixture
  cibersort_data_all <- cibersort_data_all[,seq(2,ncol(cibersort_data_all)-3)]
  cibersort_data_all <- as.data.frame(t(cibersort_data_all))
  col_data_all <- col_data_all[col_data_all$sample %in% colnames(cibersort_data_all),]
  cibersort_data_all <- cibersort_data_all[,col_data_all$sample]
  
  if (ncol(cibersort_data_all)<=1){
    print(sprintf("%s, all fail",treat))
  } else {
      print(Heatmap(t(apply(cibersort_data_all[apply(cibersort_data_all,1,sd)>0,],1,scale)),
            top_annotation = HeatmapAnnotation(treatment=col_data_all$treatment, response=col_data_all$response,time=col_data_all$time,
                                               col=list(response=c("CR_PR"="red","SD"="green","PD"="blue"),
                                                treatment=c("NIV1+IPI3"="red","NIV3-NAIVE"="green","NIV3-PROG"="blue"),
                                                time=c("POST"="green","PRE"="purple"))),
            show_row_names=T,
            show_column_names = F,
            cluster_rows=T,
            cluster_columns=T,
            heatmap_legend_param=list(title="all"),
    clustering_method_rows="ward.D2",
    clustering_method_columns="ward.D2"))

      print(Heatmap(cibersort_data_all,
            top_annotation = HeatmapAnnotation(treatment=col_data_all$treatment, response=col_data_all$response,time=col_data_all$time,
                                               col=list(response=c("CR_PR"="red","SD"="green","PD"="blue"),
                                                treatment=c("NIV1+IPI3"="red","NIV3-NAIVE"="green","NIV3-PROG"="blue"),
                                                time=c("POST"="green","PRE"="purple"))),
            show_row_names=T,
            show_column_names = F,
            cluster_rows=T,
            cluster_columns=T,
            heatmap_legend_param=list(title="all"),
    clustering_method_rows="ward.D2",
    clustering_method_columns="ward.D2")) 
  }
  

  col_data_PRE <- col_data_filt[col_data_filt$time=="PRE" & col_data_filt$treatment==treat,]
  cibersort_data_PRE <- cibersort_data_filt[cibersort_data_filt$Mixture %in% col_data_PRE$sample,,drop=F]
  rownames(cibersort_data_PRE) <- cibersort_data_PRE$Mixture
  cibersort_data_PRE <- cibersort_data_PRE[,seq(2,ncol(cibersort_data_PRE)-3)]
  cibersort_data_PRE <- as.data.frame(t(cibersort_data_PRE))
  col_data_PRE <- col_data_PRE[col_data_PRE$sample %in% colnames(cibersort_data_PRE),]
  cibersort_data_PRE <- cibersort_data_PRE[,col_data_PRE$sample,drop=F]
  
  if (ncol(cibersort_data_PRE)<=1){
    print(sprintf("%s, PRE fail",treat))
  } else {
    print(Heatmap(t(apply(cibersort_data_PRE[apply(cibersort_data_PRE,1,sd)>0,],1,scale)),
          top_annotation = HeatmapAnnotation(treatment=col_data_PRE$treatment, response=col_data_PRE$response,
                                             col=list(response=c("CR_PR"="red","SD"="green","PD"="blue"),
                                                treatment=c("NIV1+IPI3"="red","NIV3-NAIVE"="green","NIV3-PROG"="blue"))),
          show_row_names=T,
          show_column_names = F,
          cluster_rows=T,
          cluster_columns=T,
          heatmap_legend_param=list(title="PRE"),
  clustering_method_rows="ward.D2",
  clustering_method_columns="ward.D2"))

    print(Heatmap(cibersort_data_PRE,
          top_annotation = HeatmapAnnotation(treatment=col_data_PRE$treatment, response=col_data_PRE$response,
                                             col=list(response=c("CR_PR"="red","SD"="green","PD"="blue"),
                                                treatment=c("NIV1+IPI3"="red","NIV3-NAIVE"="green","NIV3-PROG"="blue"))),
          show_row_names=T,
          show_column_names = F,
          cluster_rows=T,
          cluster_columns=T,
          heatmap_legend_param=list(title="PRE"),
  clustering_method_rows="ward.D2",
  clustering_method_columns="ward.D2")) 
  }
  
  col_data_POST <- col_data_filt[col_data_filt$time=="POST" & col_data_filt$treatment==treat,]
  cibersort_data_POST <- cibersort_data[cibersort_data$Mixture %in% col_data_POST$sample,,drop=F]
  rownames(cibersort_data_POST) <- cibersort_data_POST$Mixture
  cibersort_data_POST <- cibersort_data_POST[,seq(2,ncol(cibersort_data_POST)-3)]
  cibersort_data_POST <- as.data.frame(t(cibersort_data_POST))
  col_data_POST <- col_data_POST[col_data_POST$sample %in% colnames(cibersort_data_POST),]
  cibersort_data_POST <- cibersort_data_POST[,col_data_POST$sample]
  
  if (ncol(cibersort_data_POST)<=1){
    print(sprintf("%s, POST fail",treat))
  } else {
      print(Heatmap(t(apply(cibersort_data_POST[apply(cibersort_data_POST,1,sd)>0,],1,scale)),
            top_annotation = HeatmapAnnotation(treatment=col_data_POST$treatment, response=col_data_POST$response,
                                               col=list(response=c("CR_PR"="red","SD"="green","PD"="blue"),
                                                treatment=c("NIV1+IPI3"="red","NIV3-NAIVE"="green","NIV3-PROG"="blue"))),
            show_row_names=T,
            show_column_names = F,
            cluster_rows=T,
            cluster_columns=T,
            heatmap_legend_param=list(title="POST"),
    clustering_method_rows="ward.D2",
    clustering_method_columns="ward.D2")) 
    
      print(Heatmap(cibersort_data_POST,
            top_annotation = HeatmapAnnotation(treatment=col_data_POST$treatment, response=col_data_POST$response,
                                               col=list(response=c("CR_PR"="red","SD"="green","PD"="blue"),
                                                treatment=c("NIV1+IPI3"="red","NIV3-NAIVE"="green","NIV3-PROG"="blue"))),
            show_row_names=T,
            show_column_names = F,
            cluster_rows=T,
            cluster_columns=T,
            heatmap_legend_param=list(title="POST"),
    clustering_method_rows="ward.D2",
    clustering_method_columns="ward.D2"))
  }
}
## Warning: The input is a data frame-like object, convert it to a matrix.

## Warning: The input is a data frame-like object, convert it to a matrix.

## Warning: The input is a data frame-like object, convert it to a matrix.

## Warning: The input is a data frame-like object, convert it to a matrix.

## Warning: The input is a data frame-like object, convert it to a matrix.

## Warning: The input is a data frame-like object, convert it to a matrix.

## Warning: The input is a data frame-like object, convert it to a matrix.

## Warning: The input is a data frame-like object, convert it to a matrix.

## Warning: The input is a data frame-like object, convert it to a matrix.

## Linear cibersort analysis

prop <- as.numeric(cibersort_data_filt[1,seq(2,ncol(cibersort_data_filt)-3)])
cell_types <- colnames(cibersort_data_filt)[seq(2,ncol(cibersort_data_filt)-3)]
cibersort_linear <- data.frame(proportions=prop,
                               cell_type=cell_types,
                               sample=rep(cibersort_data_filt$Mixture[1],length(cell_types)))
cibersort_logical <- vapply(seq(2,nrow(cibersort_data_filt)),function(row_val){
  prop <- as.numeric(cibersort_data_filt[row_val,seq(2,ncol(cibersort_data_filt)-3)])
  cibersort_linear <<- rbind(cibersort_linear,data.frame(proportions=prop,
                               cell_type=cell_types,
                               sample=rep(cibersort_data_filt$Mixture[row_val],length(cell_types))))
  return(T)
},logical(1))

cibersort_linear$subject <- vapply(cibersort_linear$sample,function(sample){
  unique(col_data$subj_id[col_data$sample==sample])
},character(1))

cibersort_linear$treatment <- vapply(cibersort_linear$sample,function(sample){
  unique(col_data$treatment[col_data$sample==sample])
},character(1))

cibersort_linear$time <- vapply(cibersort_linear$sample,function(sample){
  unique(col_data$time[col_data$sample==sample])
},character(1))

cibersort_linear$response <- vapply(cibersort_linear$sample,function(sample){
  unique(col_data$response[col_data$sample==sample])
},character(1))

wilcox_test_val_time <- compare_means(proportions ~ time, p.adjust.method = "BH", data=cibersort_linear,group.by = c("response","treatment","cell_type"))

wilcox_test_val_response <- compare_means(proportions ~ response, p.adjust.method = "BH", data=cibersort_linear,group.by = c("time","treatment","cell_type"))

wilcox_test_val_treatment <- compare_means(proportions ~ treatment, p.adjust.method = "BH", data=cibersort_linear,group.by = c("time","response","cell_type"))

wilcox_test_val <- compare_means(proportions ~ response, p.adjust.method = "BH", data=cibersort_linear,group.by = c("time","cell_type"))

cibersort_linear$cell_type <- factor(cibersort_linear$cell_type,levels=cell_types)

label_y_val <- max(cibersort_linear$proportions)

cibersort_linear_NIV1_IPI3 <- cibersort_linear[cibersort_linear$treatment=="NIV1+IPI3",]
cibersort_linear_NIV1_IPI3_CR_PR <- cibersort_linear[cibersort_linear$treatment=="NIV1+IPI3" & cibersort_linear$response=="CR_PR",]
cibersort_linear_NIV1_IPI3_SD <- cibersort_linear[cibersort_linear$treatment=="NIV1+IPI3" & cibersort_linear$response=="SD",]
cibersort_linear_NIV1_IPI3_PD <- cibersort_linear[cibersort_linear$treatment=="NIV1+IPI3" & cibersort_linear$response=="PD",]

cibersort_linear_NIV1_IPI3$cell_type <- factor(cibersort_linear_NIV1_IPI3$cell_type,levels=cell_types)
# ggplot(cibersort_linear_NIV1_IPI3,aes(x=sample,y=proportions,group=time,color="black",fill=cell_type))+
#   geom_bar(stat="identity",color="black")+
#   theme(axis.text.x=element_blank())+
#   facet_grid(~time)

wilcox_test_val <- compare_means(proportions ~ time, p.adjust.method = "BH", data=cibersort_linear_NIV1_IPI3_CR_PR,group.by = c("cell_type"))
cibersort_linear_NIV1_IPI3_CR_PR$time <- factor(cibersort_linear_NIV1_IPI3_CR_PR$time,levels=c("PRE","POST"))
ggplot(cibersort_linear_NIV1_IPI3_CR_PR,aes(x=cell_type,y=proportions))+
  geom_boxplot(aes(fill=time))+
  # geom_signif(comparisons = list(cell_types))+
  # add_pvalue(wilcox_test_val, label = "p.signif", remove.bracket = TRUE,y.position=0.5)+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
  stat_compare_means(aes(group = time), method="wilcox.test",label = "p.signif",label.y=label_y_val)+
  labs(title=sprintf("NIV1+IPI3,CR_PR"))

wilcox_test_val <- compare_means(proportions ~ time, p.adjust.method = "BH", data=cibersort_linear_NIV1_IPI3_SD,group.by = c("cell_type"))
cibersort_linear_NIV1_IPI3_SD$time <- factor(cibersort_linear_NIV1_IPI3_SD$time,levels=c("PRE","POST"))
ggplot(cibersort_linear_NIV1_IPI3_SD,aes(x=cell_type,y=proportions))+
  geom_boxplot(aes(fill=time))+
  # geom_signif(comparisons = list(cell_types))+
  # add_pvalue(wilcox_test_val, label = "p.signif", remove.bracket = TRUE,y.position=0.5)+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
  stat_compare_means(aes(group = time), method="wilcox.test",label = "p.signif",label.y=label_y_val)+
  labs(title=sprintf("NIV1+IPI3,SD"))

wilcox_test_val <- compare_means(proportions ~ time, p.adjust.method = "BH", data=cibersort_linear_NIV1_IPI3_PD,group.by = c("cell_type"))
cibersort_linear_NIV1_IPI3_PD$time <- factor(cibersort_linear_NIV1_IPI3_PD$time,levels=c("PRE","POST"))
ggplot(cibersort_linear_NIV1_IPI3_PD,aes(x=cell_type,y=proportions))+
  geom_boxplot(aes(fill=time))+
  # geom_signif(comparisons = list(cell_types))+
  # add_pvalue(wilcox_test_val, label = "p.signif", remove.bracket = TRUE,y.position=0.5)+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
  stat_compare_means(aes(group = time), method="wilcox.test",label = "p.signif",label.y=label_y_val)+
  labs(title=sprintf("NIV1+IPI3,PD"))

cibersort_linear_NIV3_PROG <- cibersort_linear[cibersort_linear$treatment=="NIV3-PROG",]
cibersort_linear_NIV3_PROG_CR_PR <- cibersort_linear[cibersort_linear$treatment=="NIV3-PROG" & cibersort_linear$response=="CR_PR",]
cibersort_linear_NIV3_PROG_SD <- cibersort_linear[cibersort_linear$treatment=="NIV3-PROG" & cibersort_linear$response=="SD",]
cibersort_linear_NIV3_PROG_PD <- cibersort_linear[cibersort_linear$treatment=="NIV3-PROG" & cibersort_linear$response=="PD",]

cibersort_linear_NIV3_PROG <- cibersort_linear[cibersort_linear$treatment=="NIV3-PROG",]
cibersort_linear_NIV3_PROG$cell_type <- factor(cibersort_linear_NIV3_PROG$cell_type,levels=cell_types)
# ggplot(cibersort_linear_NIV3_PROG,aes(x=sample,y=proportions,group=time,color="black",fill=cell_type))+
#   geom_bar(stat="identity",color="black")+
#   theme(axis.text.x=element_blank())+
#   facet_grid(~time)

wilcox_test_val <- compare_means(proportions ~ time, p.adjust.method = "BH", data=cibersort_linear_NIV3_PROG_CR_PR,group.by = c("cell_type"))
cibersort_linear_NIV3_PROG_CR_PR$time <- factor(cibersort_linear_NIV3_PROG_CR_PR$time,levels=c("PRE","POST"))
ggplot(cibersort_linear_NIV3_PROG_CR_PR,aes(x=cell_type,y=proportions))+
  geom_boxplot(aes(fill=time))+
  # geom_signif(comparisons = list(cell_types))+
  # add_pvalue(wilcox_test_val, label = "p.signif", remove.bracket = TRUE,y.position=0.5)+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
  stat_compare_means(aes(group = time), method="wilcox.test",label = "p.signif",label.y=label_y_val)+
  labs(title=sprintf("NIV3-PROG,CR_PR"))

wilcox_test_val <- compare_means(proportions ~ time, p.adjust.method = "BH", data=cibersort_linear_NIV3_PROG_SD,group.by = c("cell_type"))
cibersort_linear_NIV3_PROG_SD$time <- factor(cibersort_linear_NIV3_PROG_SD$time,levels=c("PRE","POST"))
ggplot(cibersort_linear_NIV3_PROG_SD,aes(x=cell_type,y=proportions))+
  geom_boxplot(aes(fill=time))+
  # geom_signif(comparisons = list(cell_types))+
  # add_pvalue(wilcox_test_val, label = "p.signif", remove.bracket = TRUE,y.position=0.5)+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
  stat_compare_means(aes(group = time), method="wilcox.test",label = "p.signif",label.y=label_y_val)+
  labs(title=sprintf("NIV3-PROG,SD"))

wilcox_test_val <- compare_means(proportions ~ time, p.adjust.method = "BH", data=cibersort_linear_NIV3_PROG_PD,group.by = c("cell_type"))
cibersort_linear_NIV3_PROG_PD$time <- factor(cibersort_linear_NIV3_PROG_PD$time,levels=c("PRE","POST"))
ggplot(cibersort_linear_NIV3_PROG_PD,aes(x=cell_type,y=proportions))+
  geom_boxplot(aes(fill=time))+
  # geom_signif(comparisons = list(cell_types))+
  # add_pvalue(wilcox_test_val, label = "p.signif", remove.bracket = TRUE,y.position=0.5)+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
  stat_compare_means(aes(group = time), method="wilcox.test",label = "p.signif",label.y=label_y_val)+
  labs(title=sprintf("NIV3-PROG,PD"))

cibersort_linear_NIV3_NAIVE <- cibersort_linear[cibersort_linear$treatment=="NIV3-NAIVE",]
cibersort_linear_NIV3_NAIVE_CR_PR <- cibersort_linear[cibersort_linear$treatment=="NIV3-NAIVE" & cibersort_linear$response=="CR_PR",]
cibersort_linear_NIV3_NAIVE_SD <- cibersort_linear[cibersort_linear$treatment=="NIV3-NAIVE" & cibersort_linear$response=="SD",]
cibersort_linear_NIV3_NAIVE_PD <- cibersort_linear[cibersort_linear$treatment=="NIV3-NAIVE" & cibersort_linear$response=="PD",]

cibersort_linear_NIV3_NAIVE <- cibersort_linear[cibersort_linear$treatment=="NIV3-NAIVE",]
cibersort_linear_NIV3_NAIVE$cell_type <- factor(cibersort_linear_NIV3_NAIVE$cell_type,levels=cell_types)
# ggplot(cibersort_linear_NIV3_NAIVE,aes(x=sample,y=proportions,group=time,color="black",fill=cell_type))+
#   geom_bar(stat="identity",color="black")+
#   theme(axis.text.x=element_blank())+
#   facet_grid(~time)

wilcox_test_val <- compare_means(proportions ~ time, p.adjust.method = "BH", data=cibersort_linear_NIV3_NAIVE_CR_PR,group.by = c("cell_type"))
cibersort_linear_NIV3_NAIVE_CR_PR$time <- factor(cibersort_linear_NIV3_NAIVE_CR_PR$time,levels=c("PRE","POST"))
ggplot(cibersort_linear_NIV3_NAIVE_CR_PR,aes(x=cell_type,y=proportions))+
  geom_boxplot(aes(fill=time))+
  # geom_signif(comparisons = list(cell_types))+
  # add_pvalue(wilcox_test_val, label = "p.signif", remove.bracket = TRUE,y.position=0.5)+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
  stat_compare_means(aes(group = time), method="wilcox.test",label = "p.signif",label.y=label_y_val)+
  labs(title=sprintf("NIV3-NAIVE,CR_PR"))

wilcox_test_val <- compare_means(proportions ~ time, p.adjust.method = "BH", data=cibersort_linear_NIV3_NAIVE_SD,group.by = c("cell_type"))
cibersort_linear_NIV3_NAIVE_SD$time <- factor(cibersort_linear_NIV3_NAIVE_SD$time,levels=c("PRE","POST"))
ggplot(cibersort_linear_NIV3_NAIVE_SD,aes(x=cell_type,y=proportions))+
  geom_boxplot(aes(fill=time))+
  # geom_signif(comparisons = list(cell_types))+
  # add_pvalue(wilcox_test_val, label = "p.signif", remove.bracket = TRUE,y.position=0.5)+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
  stat_compare_means(aes(group = time), method="wilcox.test",label = "p.signif",label.y=label_y_val)+
  labs(title=sprintf("NIV3-NAIVE,SD"))

wilcox_test_val <- compare_means(proportions ~ time, p.adjust.method = "BH", data=cibersort_linear_NIV3_NAIVE_PD,group.by = c("cell_type"))
cibersort_linear_NIV3_NAIVE_PD$time <- factor(cibersort_linear_NIV3_NAIVE_PD$time,levels=c("PRE","POST"))
ggplot(cibersort_linear_NIV3_NAIVE_PD,aes(x=cell_type,y=proportions))+
  geom_boxplot(aes(fill=time))+
  # geom_signif(comparisons = list(cell_types))+
  # add_pvalue(wilcox_test_val, label = "p.signif", remove.bracket = TRUE,y.position=0.5)+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
  stat_compare_means(aes(group = time), method="wilcox.test",label = "p.signif",label.y=label_y_val)+
  labs(title=sprintf("NIV3-NAIVE,PD"))